69#define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x)
85 double discountFactor,
121 for (
auto actionIter =
fmdp->beginActions(); actionIter !=
fmdp->endActions(); ++actionIter) {
153 for (
auto actionIter =
fmdp_->beginActions(); actionIter !=
fmdp_->endActions(); ++actionIter)
168 std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
169 for (
auto actionIter =
fmdp_->beginActions(); actionIter !=
fmdp_->endActions(); ++actionIter) {
181 qActionsSet.push_back(qAction);
214 for (
auto actionIter =
fmdp_->beginActions(); actionIter !=
fmdp_->endActions(); ++actionIter) {
217 qAction = this->
addReward_(qAction, *actionIter);
224 argMaxQActionsSet.push_back(
makeArgMax_(qAction, *actionIter));
247 for (
auto actionIter = this->
fmdp()->beginActions(); actionIter != this->
fmdp()->
endActions();
249 std::vector< MultiDimFunctionGraph< double >* > rmaxs;
250 std::vector< MultiDimFunctionGraph< double >* > boolQs;
262 std::pair< NodeId, NodeId > rooty
271 rmaxs.push_back(varRMax);
272 boolQs.push_back(varBoolQ);
308 std::pair< NodeId, NodeId >
313 std::pair< NodeId, NodeId > rep;
322 auto rmaxsons =
static_cast< NodeId*
>(
324 auto bqsons =
static_cast< NodeId*
>(
327 for (
Idx moda = 0; moda < visited->
nodeVar(currentNodeId)->domainSize(); ++moda) {
328 std::pair< NodeId, NodeId > sonp
330 rmaxsons[moda] = sonp.first;
331 bqsons[moda] = sonp.second;
343 for (
auto actionIter = this->
fmdp()->beginActions(); actionIter != this->
fmdp()->
endActions();
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
Headers of the RMax planer class.
Safe Iterators for hashtables.
HashTable< Idx, bool > _initializedTable_
const ILearningStrategy * _fmdpLearner_
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsBoolTable_
HashTable< Idx, StatesCounter * > _counterTable_
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
void _makeRMaxFunctionGraphs_()
virtual void initVFunction_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
~AdaptiveRMaxPlaner()
Default destructor.
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsRMaxTable_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
virtual Size domainSize() const =0
SequenceIteratorSafe< const DiscreteVariable * > endVariables() const
Returns an iterator reference to the end of the list of variables.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
virtual const DiscreteVariable * nodeVar(NodeId ni) const =0
virtual NodeId root() const =0
virtual bool isTerminal(NodeId ni) const =0
virtual void insertSetOfVars(MultiDimFunctionGraph< double > *) const =0
virtual NodeId nodeSon(NodeId ni, Idx modality) const =0
virtual Idx nodeNbObservation(NodeId ni) const =0
void clean()
Removes var without nodes in the diagram.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
NodeId addTerminalNode(const GUM_SCALAR &value)
Adds a value to the MultiDimFunctionGraph.
virtual void reduce()=0
Ensures that every isomorphic subgraphs are merged together.
NodeId addInternalNode(const DiscreteVariable *var)
Inserts a new non terminal node in graph.
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
<agrum/FMDP/simulation/statesCounter.h>
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
virtual MultiDimFunctionGraph< double > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< double > * > &)
virtual MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * > &)
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
virtual MultiDimFunctionGraph< double > * addReward_(MultiDimFunctionGraph< double > *function, Idx actionId=0)
IOperatorStrategy< double > * operator_
const FMDP< double > * fmdp_
StructuredPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, bool verbose)
INLINE const FMDP< double > * fmdp()
MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< double > *Qaction, Idx actionId)
virtual MultiDimFunctionGraph< double > * maximiseQactions_(std::vector< MultiDimFunctionGraph< double > * > &)
MultiDimFunctionGraph< double > * vFunction_
virtual MultiDimFunctionGraph< double > * evalQaction_(const MultiDimFunctionGraph< double > *, Idx)
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
Size NodeId
Type for node ids.
Header files of gum::Instantiation.
Headers of MultiDimFunctionGraph.
gum is the global namespace for all aGrUM entities
Headers of gum::SmallObjectAllocator.
Header of the Tensor class.