Struct renforce::agent::PolicyAgent
[−]
[src]
pub struct PolicyAgent<F: Float, S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, F>> { pub log_func: D, // some fields omitted }
Policy Agent
Explicitly stores a stochastic policy as the softmax of some differentiable function
Fields
log_func: D
The function used by this agent to calculate weights passed into Softmax
Methods
impl<S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, f64>> PolicyAgent<f64, S, A, D>
[src]
fn default(action_space: A, log_func: D) -> PolicyAgent<f64, S, A, D>
Creates a new PolicyAgent with temperature 1.0 used in Softmax
impl<F: Float, S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, F>> PolicyAgent<F, S, A, D>
[src]
fn new(action_space: A, log_func: D, temp: F) -> PolicyAgent<F, S, A, D>
Creates a new PolicyAgent with given parameters
fn temp(self, temp: F) -> PolicyAgent<F, S, A, D>
Updates temp field of self
fn get_temp(&self) -> F
Returns temperature used by agent
fn calc_log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>
Calculates the derivative of the log of this function
Trait Implementations
impl<F: Debug + Float, S: Debug + Space, A: Debug + FiniteSpace, D: Debug + DifferentiableFunc<S, A, F>> Debug for PolicyAgent<F, S, A, D> where A::Element: Debug
[src]
impl<F: Clone + Float, S: Clone + Space, A: Clone + FiniteSpace, D: Clone + DifferentiableFunc<S, A, F>> Clone for PolicyAgent<F, S, A, D> where A::Element: Clone
[src]
fn clone(&self) -> PolicyAgent<F, S, A, D>
Returns a copy of the value. Read more
fn clone_from(&mut self, source: &Self)
1.0.0
Performs copy-assignment from source
. Read more
impl<F: Float, S: Space, A: FiniteSpace, D> ParameterizedFunc<F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>
[src]
fn num_params(&self) -> usize
Returns number of parameters used by the function
fn get_params(&self) -> Vec<F>
Returns the parameters used by the function
fn set_params(&mut self, params: Vec<F>)
Changes the parameters used by the function
impl<F: Float, S: Space, A: FiniteSpace, D> LogDiffFunc<S, A, F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>
[src]
fn log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>
The gradient of the log of the output with respect to the parameters
impl<F: Float, S: Space, A: FiniteSpace, D> Agent<S, A> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>
[src]
fn get_action(&self, state: &S::Element) -> A::Element
Returns the actions the agent should perform in the given state