Struct renforce::agent::PolicyAgent [−] [src]

pub struct PolicyAgent<F: Float, S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, F>> {
    pub log_func: D,
    // some fields omitted
}

Policy Agent

Explicitly stores a stochastic policy as the softmax of some differentiable function

Fields

log_func: D

The function used by this agent to calculate weights passed into Softmax

Methods

`impl<S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, f64>> PolicyAgent<f64, S, A, D>`
[src]

`fn default(action_space: A, log_func: D) -> PolicyAgent<f64, S, A, D>`

Creates a new PolicyAgent with temperature 1.0 used in Softmax

`impl<F: Float, S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, F>> PolicyAgent<F, S, A, D>`
[src]

`fn new(action_space: A, log_func: D, temp: F) -> PolicyAgent<F, S, A, D>`

Creates a new PolicyAgent with given parameters

`fn temp(self, temp: F) -> PolicyAgent<F, S, A, D>`

Updates temp field of self

`fn get_temp(&self) -> F`

Returns temperature used by agent

`fn calc_log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>`

Calculates the derivative of the log of this function

Trait Implementations

`impl<F: Debug + Float, S: Debug + Space, A: Debug + FiniteSpace, D: Debug + DifferentiableFunc<S, A, F>> Debug for PolicyAgent<F, S, A, D> where A::Element: Debug`
[src]

`fn fmt(&self, __arg_0: &mut Formatter) -> Result`

Formats the value using the given formatter.

`impl<F: Clone + Float, S: Clone + Space, A: Clone + FiniteSpace, D: Clone + DifferentiableFunc<S, A, F>> Clone for PolicyAgent<F, S, A, D> where A::Element: Clone`
[src]

`fn clone(&self) -> PolicyAgent<F, S, A, D>`

Returns a copy of the value. Read more

`fn clone_from(&mut self, source: &Self)`
1.0.0

Performs copy-assignment from source. Read more

`impl<F: Float, S: Space, A: FiniteSpace, D> ParameterizedFunc<F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>`
[src]

`fn num_params(&self) -> usize`

Returns number of parameters used by the function

`fn get_params(&self) -> Vec<F>`

Returns the parameters used by the function

`fn set_params(&mut self, params: Vec<F>)`

Changes the parameters used by the function

`impl<F: Float, S: Space, A: FiniteSpace, D> LogDiffFunc<S, A, F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>`
[src]

`fn log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>`

The gradient of the log of the output with respect to the parameters

`impl<F: Float, S: Space, A: FiniteSpace, D> Agent<S, A> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>`
[src]

`fn get_action(&self, state: &S::Element) -> A::Element`

Returns the actions the agent should perform in the given state

Struct renforce::agent::PolicyAgent [−] [src]

Fields

Methods

impl<S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, f64>> PolicyAgent<f64, S, A, D>[src]

fn default(action_space: A, log_func: D) -> PolicyAgent<f64, S, A, D>

impl<F: Float, S: Space, A: FiniteSpace, D: DifferentiableFunc<S, A, F>> PolicyAgent<F, S, A, D>[src]

fn new(action_space: A, log_func: D, temp: F) -> PolicyAgent<F, S, A, D>

fn temp(self, temp: F) -> PolicyAgent<F, S, A, D>

fn get_temp(&self) -> F

fn calc_log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>

Trait Implementations

impl<F: Debug + Float, S: Debug + Space, A: Debug + FiniteSpace, D: Debug + DifferentiableFunc<S, A, F>> Debug for PolicyAgent<F, S, A, D> where A::Element: Debug[src]

fn fmt(&self, __arg_0: &mut Formatter) -> Result

impl<F: Clone + Float, S: Clone + Space, A: Clone + FiniteSpace, D: Clone + DifferentiableFunc<S, A, F>> Clone for PolicyAgent<F, S, A, D> where A::Element: Clone[src]

fn clone(&self) -> PolicyAgent<F, S, A, D>

fn clone_from(&mut self, source: &Self)1.0.0

impl<F: Float, S: Space, A: FiniteSpace, D> ParameterizedFunc<F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>[src]

fn num_params(&self) -> usize

fn get_params(&self) -> Vec<F>

fn set_params(&mut self, params: Vec<F>)

impl<F: Float, S: Space, A: FiniteSpace, D> LogDiffFunc<S, A, F> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>[src]

fn log_grad(&self, state: &S::Element, action: &A::Element) -> Vec<F>

impl<F: Float, S: Space, A: FiniteSpace, D> Agent<S, A> for PolicyAgent<F, S, A, D> where D: DifferentiableFunc<S, A, F>[src]

fn get_action(&self, state: &S::Element) -> A::Element