69#define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
85 template <
typename GUM_SCALAR >
87 GUM_SCALAR discountFactor,
101 template <
typename GUM_SCALAR >
123 template <
typename GUM_SCALAR >
133 std::stringstream output;
134 std::stringstream terminalStream;
135 std::stringstream nonTerminalStream;
136 std::stringstream arcstream;
139 output << std::endl <<
"digraph \" OPTIMAL POLICY \" {" << std::endl;
142 terminalStream <<
"node [shape = box];" << std::endl;
143 nonTerminalStream <<
"node [shape = ellipse];" << std::endl;
146 std::string tab =
"\t";
152 std::queue< NodeId > fifo;
161 while (!fifo.empty()) {
163 NodeId currentNodeId = fifo.front();
172 terminalStream << tab << currentNodeId <<
";" << tab << currentNodeId <<
" [label=\""
173 << currentNodeId <<
" - ";
178 terminalStream <<
fmdp_->actionName(*valIter) <<
" ";
181 terminalStream <<
"\"];" << std::endl;
188 const InternalNode* currentNode = optimalPolicy_->node(currentNodeId);
191 nonTerminalStream << tab << currentNodeId <<
";" << tab << currentNodeId <<
" [label=\""
192 << currentNodeId <<
" - " << currentNode->
nodeVar()->
name() <<
"\"];"
197 for (
Idx sonIter = 0; sonIter < currentNode->
nbSons(); ++sonIter) {
198 if (!visited.
exists(currentNode->
son(sonIter))) {
199 fifo.push(currentNode->
son(sonIter));
200 visited << currentNode->
son(sonIter);
202 if (!sonMap.
exists(currentNode->
son(sonIter)))
204 sonMap[currentNode->
son(sonIter)]->addLink(sonIter);
208 for (
auto sonIter = sonMap.
beginSafe(); sonIter != sonMap.
endSafe(); ++sonIter) {
209 arcstream << tab << currentNodeId <<
" -> " << sonIter.key() <<
" [label=\" ";
213 if (modaIter->
nextLink()) arcstream <<
", ";
216 arcstream <<
"\",color=\"#00ff00\"];" << std::endl;
217 delete sonIter.val();
223 output << terminalStream.str() << std::endl
224 << nonTerminalStream.str() << std::endl
225 << arcstream.str() << std::endl
242 template <
typename GUM_SCALAR >
250 for (
auto varIter =
fmdp_->beginVariables(); varIter !=
fmdp_->endVariables(); ++varIter)
262 template <
typename GUM_SCALAR >
285 if (gap < fabs(deltaV->
value())) gap = fabs(deltaV->
value());
289 std::cout <<
" ------------------- Fin itération n° " << nbIte << std::endl
290 <<
" Gap : " << gap <<
" - " <<
_threshold_ << std::endl;
307 template <
typename GUM_SCALAR >
324 template <
typename GUM_SCALAR >
333 std::vector< MultiDimFunctionGraph< GUM_SCALAR >* > qActionsSet;
334 for (
auto actionIter =
fmdp_->beginActions(); actionIter !=
fmdp_->endActions(); ++actionIter) {
336 qActionsSet.push_back(qAction);
343 newVFunction = this->maximiseQactions_(qActionsSet);
347 newVFunction = this->addReward_(newVFunction);
355 template <
typename GUM_SCALAR >
356 MultiDimFunctionGraph< GUM_SCALAR >*
370 template <
typename GUM_SCALAR >
374 qActionsSet.pop_back();
376 while (!qActionsSet.empty()) {
378 qActionsSet.pop_back();
379 newVFunction =
operator_->maximize(newVFunction, qAction);
388 template <
typename GUM_SCALAR >
392 qActionsSet.pop_back();
394 while (!qActionsSet.empty()) {
396 qActionsSet.pop_back();
397 newVFunction =
operator_->minimize(newVFunction, qAction);
406 template <
typename GUM_SCALAR >
434 template <
typename GUM_SCALAR >
445 for (
auto actionIter =
fmdp_->beginActions(); actionIter !=
fmdp_->endActions(); ++actionIter) {
450 argMaxQActionsSet.push_back(
makeArgMax_(qAction, *actionIter));
472 template <
typename GUM_SCALAR >
478 =
operator_->getArgMaxFunctionInstance();
485 amcpy->
add(**varIter);
498 template <
typename GUM_SCALAR >
505 if (visitedNodes.
exists(currentNodeId))
return visitedNodes[currentNodeId];
510 nody = argMaxCpy->manager()->addTerminalNode(leaf);
515 for (
Idx moda = 0; moda < currentNode->
nodeVar()->domainSize(); ++moda)
518 nody = argMaxCpy->manager()->addInternalNode(currentNode->
nodeVar(), sonsMap);
520 visitedNodes.
insert(currentNodeId, nody);
527 template <
typename GUM_SCALAR >
533 = qActionsSet.back();
534 qActionsSet.pop_back();
536 while (!qActionsSet.empty()) {
538 = qActionsSet.back();
539 qActionsSet.pop_back();
540 newVFunction =
operator_->argmaximize(newVFunction, qAction);
551 template <
typename GUM_SCALAR >
554 argMaxOptimalValueFunction) {
559 = argMaxOptimalValueFunction->variablesSequence().beginSafe();
560 varIter != argMaxOptimalValueFunction->variablesSequence().endSafe();
566 argMaxOptimalValueFunction,
569 delete argMaxOptimalValueFunction;
575 template <
typename GUM_SCALAR >
581 if (visitedNodes.
exists(currentNodeId))
return visitedNodes[currentNodeId];
584 if (argMaxOptVFunc->isTerminalNode(currentNodeId)) {
589 const InternalNode* currentNode = argMaxOptVFunc->node(currentNodeId);
592 for (
Idx moda = 0; moda < currentNode->
nodeVar()->domainSize(); ++moda)
596 visitedNodes.
insert(currentNodeId, nody);
603 template <
typename GUM_SCALAR >
A class to store the optimal actions.
SequenceIteratorSafe< Idx > endSafe() const
Iterator end.
SequenceIteratorSafe< Idx > beginSafe() const
Iterator beginning.
Class to handle efficiently argMaxSet.
SequenceIteratorSafe< GUM_SCALAR_SEQ > beginSafe() const
Iterator beginning.
SequenceIteratorSafe< GUM_SCALAR_SEQ > endSafe() const
Iterator end.
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.
virtual Size domainSize() const =0
void nextValue() const
Increments the constant safe iterator.
void beginValues() const
Initializes the constant safe iterator on terminal nodes.
bool hasValue() const
Indicates if constant safe iterator has reach end of terminal nodes list.
const GUM_SCALAR & value() const
Returns the value of the current terminal nodes pointed by the constant safe iterator.
This class is used to implement factored decision process.
The class for generic Hash Tables.
const iterator_safe & endSafe() noexcept
Returns the safe iterator pointing to the end of the hashtable.
bool exists(const Key &key) const
Checks whether there exists an element with a given key in the hashtable.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
iterator_safe beginSafe()
Returns the safe iterator pointing to the beginning of the hashtable.
<agrum/FMDP/SDyna/IOperatorStrategy.h>
Structure used to represent a node internal structure.
const DiscreteVariable * nodeVar() const
Returns the node variable.
Idx nbSons() const
Returns the number of sons.
NodeId son(Idx modality) const
Returns the son at a given index.
Link of a chain list allocated using the SmallObjectAllocator.
const Link< T > * nextLink() const
Returns next link.
const T & element() const
Returns the element stored in this link.
Chain list allocated using the SmallObjectAllocator.
Class implementingting a function graph.
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
const NodeId & root() const
Returns the id of the root node from the diagram.
bool isTerminalNode(const NodeId &node) const
Indicates if given node is terminal or not.
const InternalNode * node(NodeId n) const
Returns internalNode structure associated to that nodeId.
const GUM_SCALAR & nodeValue(NodeId n) const
Returns value associated to given node.
void copyAndMultiplyByScalar(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, GUM_SCALAR gamma)
Copies src diagrams and multiply every value by the given scalar.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
virtual void add(const DiscreteVariable &v)
Adds a new var to the variables of the multidimensional matrix.
virtual const Sequence< const DiscreteVariable * > & variablesSequence() const override
Returns a const ref to the sequence of DiscreteVariable*.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal.
virtual ~StructuredPlaner()
Default destructor.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a).
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
IOperatorStrategy< GUM_SCALAR > * operator_
const FMDP< double > * fmdp_
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
virtual MultiDimFunctionGraph< double > * valueIteration_()
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*.
gum::VariableSet elVarSeq_
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual void initVFunction_()
Performs a single step of value iteration.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a).
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< double > * evalQaction_(const MultiDimFunctionGraph< double > *, Idx)
const std::string & name() const
returns the name of the variable
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
Size NodeId
Type for node ids.
Header files of gum::Instantiation.
Headers of MultiDimFunctionGraph.
gum is the global namespace for all aGrUM entities
Headers of the StructuredPlaner planer class.
Header of the Tensor class.