Skip to content

Commit 1168d3f

Browse files
committed
more documentation updated.
1 parent 2e41b5b commit 1168d3f

File tree

3 files changed

+65
-3
lines changed

3 files changed

+65
-3
lines changed

Assets/UnityTensorflow/Learning/Mimic/SupervisedLearningModel.cs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,22 @@
2424
public interface ISupervisedLearningModel
2525
{
2626
/// <summary>
27-
///
27+
/// Evaluate the desired actions of current states.
2828
/// </summary>
29-
/// <param name="vectorObservation"></param>
30-
/// <param name="visualObservation"></param>
29+
/// <param name="vectorObservation">Batched vector observations.</param>
30+
/// <param name="visualObservation">List of batched visual observations.</param>
31+
/// <param name="actionsMask">Action masks for discrete action space. Each element in the list is for one branch of the actions. Can be null if no mask.</param>
3132
/// <returns>(means,vars). If the supervised learning model does not support var, the second can be null</returns>
3233
ValueTuple<float[,], float[,]> EvaluateAction(float[,] vectorObservation, List<float[,,,]> visualObservation, List<float[,]> actionsMask = null);
34+
35+
/// <summary>
36+
/// Train a batch for Supervised learning
37+
/// </summary>
38+
/// <param name="vectorObservation">Batched vector observations.</param>
39+
/// <param name="visualObservation">List of batched visual observations.</param>
40+
/// <param name="actions">Desired actions under input states.</param>
41+
/// <param name="actionsMask">Action masks for discrete action space. Each element in the list is for one branch of the actions. Can be null if no mask.</param>
42+
/// <returns></returns>
3343
float TrainBatch(float[,] vectorObservations, List<float[,,,]> visualObservations, float[,] actions, List<float[,]> actionsMask = null);
3444
}
3545

Assets/UnityTensorflow/Learning/PPO/RLModelPPO.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,66 @@
2828

2929
public interface IRLModelPPO
3030
{
31+
/// <summary>
32+
/// The entropy loss weight
33+
/// </summary>
3134
float EntropyLossWeight { get; set; }
35+
36+
/// <summary>
37+
/// The value loss weight
38+
/// </summary>
3239
float ValueLossWeight { get; set; }
40+
41+
/// <summary>
42+
/// The clip epsilon for PPO actor loss
43+
/// </summary>
3344
float ClipEpsilon { get; set; }
45+
46+
/// <summary>
47+
/// The value loss clip
48+
/// </summary>
3449
float ClipValueLoss { get; set; }
3550

51+
/// <summary>
52+
/// Evaluate the values of current states
53+
/// </summary>
54+
/// <param name="vectorObservation">Batched vector observations.</param>
55+
/// <param name="visualObservation">List of batched visual observations.</param>
56+
/// <returns>Values of the input batched states</returns>
3657
float[] EvaluateValue(float[,] vectorObservation, List<float[,,,]> visualObservation);
58+
59+
/// <summary>
60+
/// Evaluate the desired actions of current states
61+
/// </summary>
62+
/// <param name="vectorObservation">Batched vector observations.</param>
63+
/// <param name="actionProbs">Output action probabilities of the output actions. Used for PPO training.</param>
64+
/// <param name="visualObservation">List of batched visual observations.</param>
65+
/// <param name="actionsMask">Action masks for discrete action space. Each element in the list is for one branch of the actions. Can be null if no mask.</param>
66+
/// <returns>The desired actions of the batched input states.</returns>
3767
float[,] EvaluateAction(float[,] vectorObservation, out float[,] actionProbs, List<float[,,,]> visualObservation, List<float[,]> actionsMask = null);
68+
69+
/// <summary>
70+
/// Evaluate the input actions' probabilities of current states
71+
/// </summary>
72+
/// <param name="vectorObservation">Batched vector observations.</param>
73+
/// <param name="actions">The batched actions that need the probabilies</param>
74+
/// <param name="visualObservation">List of batched visual observations.</param>
75+
/// <param name="actionsMask">Action masks for discrete action space. Each element in the list is for one branch of the actions. Can be null if no mask.</param>
76+
/// <returns>Output action probabilities of the output actions. Used for PPO training.</returns>
3877
float[,] EvaluateProbability(float[,] vectorObservation, float[,] actions, List<float[,,,]> visualObservation, List<float[,]> actionsMask = null);
78+
79+
/// <summary>
80+
/// Train a batch for PPO
81+
/// </summary>
82+
/// <param name="vectorObservations">Batched vector observations.</param>
83+
/// <param name="visualObservations">List of batched visual observations.</param>
84+
/// <param name="actions">The old actions taken in those input states.</param>
85+
/// <param name="actionProbs">The old probabilities of old actions taken in those input states.</param>
86+
/// <param name="targetValues">Target values.</param>
87+
/// <param name="oldValues">Old values evaluated from the neural network from those input states.</param>
88+
/// <param name="advantages">Advantages.</param>
89+
/// <param name="actionsMask">Action masks for discrete action space. Each element in the list is for one branch of the actions. Can be null if no mask.</param>
90+
/// <returns></returns>
3991
float[] TrainBatch(float[,] vectorObservations, List<float[,,,]> visualObservations, float[,] actions, float[,] actionProbs, float[] targetValues, float[] oldValues, float[] advantages, List<float[,]> actionsMask = null);
4092
}
4193

42.4 KB
Loading

0 commit comments

Comments
 (0)