51#ifndef GUM_STRUCTURED_PLANNING_H
52#define GUM_STRUCTURED_PLANNING_H
81 template <
typename GUM_SCALAR >
93 GUM_SCALAR epsilon = 0.00001,
94 bool verbose =
true) {
105 GUM_SCALAR epsilon = 0.00001,
106 bool verbose =
true) {
125 GUM_SCALAR discountFactor,
315 optimalValueFunction);
Headers of the Planning Strategy interface.
A class to store the optimal actions.
Class to handle efficiently argMaxSet.
This class is used to implement factored decision process.
<agrum/FMDP/SDyna/IOperatorStrategy.h>
<agrum/FMDP/SDyna/IPlanningStrategy.h>
<agrum/FMDP/planning/mddOperatorStrategy.h>
Class implementingting a function graph.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal.
virtual ~StructuredPlaner()
Default destructor.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
virtual Size vFunctionSize()
Returns vFunction computed so far current size.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a).
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
IOperatorStrategy< GUM_SCALAR > * operator_
const FMDP< GUM_SCALAR > * fmdp_
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
virtual Size optimalPolicySize()
Returns optimalPolicy computed so far current size.
virtual MultiDimFunctionGraph< GUM_SCALAR > * valueIteration_()
Performs a single step of value iteration.
INLINE const MultiDimFunctionGraph< GUM_SCALAR > * vFunction()
Returns a const ptr on the value function computed so far.
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*.
gum::VariableSet elVarSeq_
A Set to eleminate primed variables.
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual void initVFunction_()
Performs a single step of value iteration.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a).
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
INLINE MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()
Returns the best policy obtained so far.
virtual MultiDimFunctionGraph< GUM_SCALAR > * evalQaction_(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s'|s,a).V^{t-1}(s') part of the value itération.
<agrum/FMDP/planning/treeOperatorStrategy.h>
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size Idx
Type for indexes.
Size NodeId
Type for node ids.
Headers of the MDDOperatorStrategy planer class.
gum is the global namespace for all aGrUM entities
Set< const DiscreteVariable * > VariableSet
Template implementation of FMDP/planning/StructuredPlaner.h classes.
Headers of the TreeOperatorStrategy planer class.