1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
mod qlearner;
mod cem;
mod lspi;
mod policygrad;
pub use self::qlearner::{QLearner, SARSALearner, DynaQ, FittedQIteration};
pub use self::cem::CrossEntropy;
pub use self::lspi::LSPolicyIteration;
pub use self::policygrad::PolicyGradient;
use environment::{Space, Environment, Transition};
use agent::Agent;
pub trait OnlineTrainer<S: Space, A: Space, T: Agent<S, A>> {
fn train_step(&mut self, agent: &mut T, transition: Transition<S, A>);
fn train(&mut self, agent: &mut T, env: &mut Environment<State=S, Action=A>);
}
pub trait EpisodicTrainer<S: Space, A: Space, T: Agent<S, A>> {
fn train_step(&mut self, agent: &mut T, env: &mut Environment<State=S, Action=A>);
fn train(&mut self, agent: &mut T, env: &mut Environment<State=S, Action=A>);
}
pub trait BatchTrainer<S: Space, A: Space, T: Agent<S, A>> {
fn train(&mut self, agent: &mut T, transitions: Vec<Transition<S, A>>);
}