Struct renforce::trainer::DynaQ
[−]
[src]
pub struct DynaQ<S: Space, A: FiniteSpace, M: Model<S, A>> where S::Element: Hash + Eq,
A::Element: Hash + Eq { /* fields omitted */ }
Represents an OnlineTrainer for Q-functions Uses the Dyna-Q algorithm
Methods
impl<S: Space, A: FiniteSpace, M: Model<S, A>> DynaQ<S, A, M> where S::Element: Hash + Eq,
A::Element: Hash + Eq
[src]
A::Element: Hash + Eq
fn new(action_space: A,
gamma: f64,
alpha: f64,
train_period: TimePeriod,
num_samples: usize,
model: M)
-> DynaQ<S, A, M>
gamma: f64,
alpha: f64,
train_period: TimePeriod,
num_samples: usize,
model: M)
-> DynaQ<S, A, M>
Returns a new DynaQ with the given parameters
fn default(action_space: A, model: M) -> DynaQ<S, A, M>
Creates a new DynaQ with default parameters
fn gamma(self, gamma: f64) -> DynaQ<S, A, M>
Sets gamma field of self
fn alpha(self, alpha: f64) -> DynaQ<S, A, M>
Sets alpha field of self
fn train_period(self, train_period: TimePeriod) -> DynaQ<S, A, M>
Sets train_period field of self
fn num_samples(self, num_samples: usize) -> DynaQ<S, A, M>
Sets num_samples field of self
Trait Implementations
impl<S: Debug + Space, A: Debug + FiniteSpace, M: Debug + Model<S, A>> Debug for DynaQ<S, A, M> where S::Element: Hash + Eq,
A::Element: Hash + Eq,
A::Element: Debug,
S::Element: Debug
[src]
A::Element: Hash + Eq,
A::Element: Debug,
S::Element: Debug
impl<T, S: Space, A: FiniteSpace, M: Model<S, A>> OnlineTrainer<S, A, T> for DynaQ<S, A, M> where T: QFunction<S, A> + Agent<S, A>,
S::Element: Hash + Eq,
A::Element: Hash + Eq
[src]
S::Element: Hash + Eq,
A::Element: Hash + Eq
fn train_step(&mut self, agent: &mut T, transition: Transition<S, A>)
Performs one training iteration using the given transition
fn train(&mut self, agent: &mut T, env: &mut Environment<State=S, Action=A>)
Automatically trains the agent to perform well in the environment