aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
structuredPlaner_tpl.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40#pragma once
41
42
50
51// =========================================================================
52#include <queue>
53#include <vector>
54// #include <algorithm>
55// #include <utility>
56// =========================================================================
58
60// =========================================================================
64// =========================================================================
66// =========================================================================
67
69#define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
70
71namespace gum {
72
73
74 /* **************************************************************************************************
75 * **/
76 /* ** **/
77 /* ** Constructors / Destructors **/
78 /* ** **/
79 /* **************************************************************************************************
80 * **/
81
82 // ===========================================================================
83 // Default constructor
84 // ===========================================================================
85 template < typename GUM_SCALAR >
87 GUM_SCALAR discountFactor,
88 GUM_SCALAR epsilon,
89 bool verbose) :
90 discountFactor_(discountFactor), operator_(opi), verbose_(verbose) {
91 GUM_CONSTRUCTOR(StructuredPlaner);
92
93 _threshold_ = epsilon;
94 vFunction_ = nullptr;
95 optimalPolicy_ = nullptr;
96 }
97
98 // ===========================================================================
99 // Default destructor
100 // ===========================================================================
101 template < typename GUM_SCALAR >
103 GUM_DESTRUCTOR(StructuredPlaner);
104
105 if (vFunction_) { delete vFunction_; }
106
107 if (optimalPolicy_) delete optimalPolicy_;
108
109 delete operator_;
110 }
111
112 /* **************************************************************************************************
113 * **/
114 /* ** **/
115 /* ** Datastructure access methods **/
116 /* ** **/
117 /* **************************************************************************************************
118 * **/
119
120 // ===========================================================================
121 // Initializes data structure needed for making the planning
122 // ===========================================================================
123 template < typename GUM_SCALAR >
125 // ************************************************************************
126 // Discarding the case where no \pi* have been computed
127 if (!optimalPolicy_ || optimalPolicy_->root() == 0) return "NO OPTIMAL POLICY CALCULATED YET";
128
129 // ************************************************************************
130 // Initialisation
131
132 // Declaration of the needed string stream
133 std::stringstream output;
134 std::stringstream terminalStream;
135 std::stringstream nonTerminalStream;
136 std::stringstream arcstream;
137
138 // First line for the toDot
139 output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;
140
141 // Form line for the internal node stream en the terminal node stream
142 terminalStream << "node [shape = box];" << std::endl;
143 nonTerminalStream << "node [shape = ellipse];" << std::endl;
144
145 // For somme clarity in the final string
146 std::string tab = "\t";
147
148 // To know if we already checked a node or not
149 Set< NodeId > visited;
150
151 // FIFO of nodes to visit
152 std::queue< NodeId > fifo;
153
154 // Loading the FIFO
155 fifo.push(optimalPolicy_->root());
156 visited << optimalPolicy_->root();
157
158
159 // ************************************************************************
160 // Main loop
161 while (!fifo.empty()) {
162 // Node to visit
163 NodeId currentNodeId = fifo.front();
164 fifo.pop();
165
166 // Checking if it is terminal
167 if (optimalPolicy_->isTerminalNode(currentNodeId)) {
168 // Get back the associated ActionSet
169 ActionSet ase = optimalPolicy_->nodeValue(currentNodeId);
170
171 // Creating a line for this node
172 terminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""
173 << currentNodeId << " - ";
174
175 // Enumerating and adding to the line the associated optimal actions
176 for (SequenceIteratorSafe< Idx > valIter = ase.beginSafe(); valIter != ase.endSafe();
177 ++valIter)
178 terminalStream << fmdp_->actionName(*valIter) << " ";
180 // Terminating line
181 terminalStream << "\"];" << std::endl;
182 continue;
183 }
184
185 // Either wise
186 {
187 // Geting back the associated internal node
188 const InternalNode* currentNode = optimalPolicy_->node(currentNodeId);
189
190 // Creating a line in internalnode stream for this node
191 nonTerminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""
192 << currentNodeId << " - " << currentNode->nodeVar()->name() << "\"];"
193 << std::endl;
194
195 // Going through the sons and agregating them according the the sons Ids
197 for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {
198 if (!visited.exists(currentNode->son(sonIter))) {
199 fifo.push(currentNode->son(sonIter));
200 visited << currentNode->son(sonIter);
201 }
202 if (!sonMap.exists(currentNode->son(sonIter)))
203 sonMap.insert(currentNode->son(sonIter), new LinkedList< Idx >());
204 sonMap[currentNode->son(sonIter)]->addLink(sonIter);
205 }
206
207 // Adding to the arc stram
208 for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe(); ++sonIter) {
209 arcstream << tab << currentNodeId << " -> " << sonIter.key() << " [label=\" ";
210 Link< Idx >* modaIter = sonIter.val()->list();
211 while (modaIter) {
212 arcstream << currentNode->nodeVar()->label(modaIter->element());
213 if (modaIter->nextLink()) arcstream << ", ";
214 modaIter = modaIter->nextLink();
215 }
216 arcstream << "\",color=\"#00ff00\"];" << std::endl;
217 delete sonIter.val();
218 }
219 }
220 }
221
222 // Terminating
223 output << terminalStream.str() << std::endl
224 << nonTerminalStream.str() << std::endl
225 << arcstream.str() << std::endl
226 << "}" << std::endl;
227
228 return output.str();
230
231 /* **************************************************************************************************
232 * **/
233 /* ** **/
234 /* ** Planning Methods **/
235 /* ** **/
236 /* **************************************************************************************************
237 * **/
238
239 // ===========================================================================
240 // Initializes data structure needed for making the planning
241 // ===========================================================================
242 template < typename GUM_SCALAR >
244 fmdp_ = fmdp;
245
246 // Determination of the threshold value
248
249 // Establishement of sequence of variable elemination
250 for (auto varIter = fmdp_->beginVariables(); varIter != fmdp_->endVariables(); ++varIter)
251 elVarSeq_ << fmdp_->main2prime(*varIter);
252
253 // Initialisation of the value function
254 vFunction_ = operator_->getFunctionInstance();
255 optimalPolicy_ = operator_->getAggregatorInstance();
257 }
258
259 // ===========================================================================
260 // Performs a value iteration
261 // ===========================================================================
262 template < typename GUM_SCALAR >
264 if (_firstTime_) {
265 this->initVFunction_();
266 _firstTime_ = false;
267 }
268
269 // *****************************************************************************************
270 // Main loop
271 // *****************************************************************************************
272 Idx nbIte = 0;
273 GUM_SCALAR gap = _threshold_ + 1;
274 while ((gap > _threshold_) && (nbIte < nbStep)) {
275 ++nbIte;
276
278
279 // *****************************************************************************************
280 // Then we compare new value function and the old one
281 MultiDimFunctionGraph< GUM_SCALAR >* deltaV = operator_->subtract(newVFunction, vFunction_);
282 gap = 0;
283
284 for (deltaV->beginValues(); deltaV->hasValue(); deltaV->nextValue())
285 if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());
286 delete deltaV;
287
288 if (verbose_)
289 std::cout << " ------------------- Fin itération n° " << nbIte << std::endl
290 << " Gap : " << gap << " - " << _threshold_ << std::endl;
292 // *****************************************************************************************
293 // And eventually we update pointers for next loop
294 delete vFunction_;
295 vFunction_ = newVFunction;
296 }
297
298 // *****************************************************************************************
299 // Policy matching value function research
300 // *****************************************************************************************
301 this->evalPolicy_();
302 }
303
304 // ===========================================================================
305 // Performs a single step of value iteration
306 // ===========================================================================
307 template < typename GUM_SCALAR >
311
312 /* **************************************************************************************************
313 * **/
314 /* ** **/
315 /* ** Value Iteration Methods **/
316 /* ** **/
317 /* **************************************************************************************************
318 * **/
319
320
321 // ===========================================================================
322 // Performs a single step of value iteration
323 // ===========================================================================
324 template < typename GUM_SCALAR >
326 // *****************************************************************************************
327 // Loop reset
328 MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();
329 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
330
331 // *****************************************************************************************
332 // For each action
333 std::vector< MultiDimFunctionGraph< GUM_SCALAR >* > qActionsSet;
334 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
335 MultiDimFunctionGraph< GUM_SCALAR >* qAction = this->evalQaction_(newVFunction, *actionIter);
336 qActionsSet.push_back(qAction);
337 }
338 delete newVFunction;
339
340 // *****************************************************************************************
341 // Next to evaluate main value function, we take maximise over all action
342 // value, ...
343 newVFunction = this->maximiseQactions_(qActionsSet);
344
345 // *******************************************************************************************
346 // Next, we eval the new function value
347 newVFunction = this->addReward_(newVFunction);
348
349 return newVFunction;
350 }
351
352 // ===========================================================================
353 // Evals the q function for current fmdp action
354 // ===========================================================================
355 template < typename GUM_SCALAR >
356 MultiDimFunctionGraph< GUM_SCALAR >*
358 Idx actionId) {
359 // ******************************************************************************
360 // Initialisation :
361 // Creating a copy of last Vfunction to deduce from the new Qaction
362 // And finding the first var to eleminate (the one at the end)
363
364 return operator_->regress(Vold, actionId, this->fmdp_, this->elVarSeq_);
365 }
366
367 // ===========================================================================
368 // Maximise the AAction to iobtain the vFunction
369 // ===========================================================================
370 template < typename GUM_SCALAR >
372 std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
373 MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
374 qActionsSet.pop_back();
375
376 while (!qActionsSet.empty()) {
377 MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
378 qActionsSet.pop_back();
379 newVFunction = operator_->maximize(newVFunction, qAction);
380 }
381
382 return newVFunction;
383 }
384
385 // ===========================================================================
386 // Maximise the AAction to iobtain the vFunction
387 // ===========================================================================
388 template < typename GUM_SCALAR >
390 std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
391 MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
392 qActionsSet.pop_back();
393
394 while (!qActionsSet.empty()) {
395 MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
396 qActionsSet.pop_back();
397 newVFunction = operator_->minimize(newVFunction, qAction);
398 }
399
400 return newVFunction;
401 }
402
403 // ===========================================================================
404 // Updates the value function by multiplying by discount and adding reward
405 // ===========================================================================
406 template < typename GUM_SCALAR >
409 Idx actionId) {
410 // *****************************************************************************************
411 // ... we multiply the result by the discount factor, ...
412 MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();
413 newVFunction->copyAndMultiplyByScalar(*Vold, this->discountFactor_);
414 delete Vold;
415
416 // *****************************************************************************************
417 // ... and finally add reward
418 newVFunction = operator_->add(newVFunction, RECAST(fmdp_->reward(actionId)));
419
420 return newVFunction;
421 }
422
423 /* **************************************************************************************************
424 * **/
425 /* ** **/
426 /* ** Optimal Policy Evaluation Methods **/
427 /* ** **/
428 /* **************************************************************************************************
429 * **/
430
431 // ===========================================================================
432 // Evals the policy corresponding to the given value function
433 // ===========================================================================
434 template < typename GUM_SCALAR >
436 // *****************************************************************************************
437 // Loop reset
438 MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();
439 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
440
441 std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* >
442 argMaxQActionsSet;
443 // *****************************************************************************************
444 // For each action
445 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
446 MultiDimFunctionGraph< GUM_SCALAR >* qAction = this->evalQaction_(newVFunction, *actionIter);
447
448 qAction = this->addReward_(qAction);
449
450 argMaxQActionsSet.push_back(makeArgMax_(qAction, *actionIter));
451 }
452 delete newVFunction;
453
454
455 // *****************************************************************************************
456 // Next to evaluate main value function, we take maximise over all action
457 // value, ...
459 = argmaximiseQactions_(argMaxQActionsSet);
460
461 // *****************************************************************************************
462 // Next to evaluate main value function, we take maximise over all action
463 // value, ...
464 extractOptimalPolicy_(argMaxVFunction);
465 }
466
467 // ===========================================================================
468 // Creates a copy of given in parameter decision Graph and replaces leaves
469 // of that Graph by a pair containing value of the leaf and action to which
470 // is bind this Graph (given in parameter).
471 // ===========================================================================
472 template < typename GUM_SCALAR >
476 Idx actionId) {
478 = operator_->getArgMaxFunctionInstance();
479
480 // Insertion des nouvelles variables
482 = qAction->variablesSequence().beginSafe();
483 varIter != qAction->variablesSequence().endSafe();
484 ++varIter)
485 amcpy->add(**varIter);
486
488 amcpy->manager()->setRootNode(
489 _recurArgMaxCopy_(qAction->root(), actionId, qAction, amcpy, src2dest));
490
491 delete qAction;
492 return amcpy;
493 }
494
495 // ==========================================================================
496 // Recursion part for the createArgMaxCopy
497 // ==========================================================================
498 template < typename GUM_SCALAR >
500 NodeId currentNodeId,
501 Idx actionId,
504 HashTable< NodeId, NodeId >& visitedNodes) {
505 if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
506
507 NodeId nody;
508 if (src->isTerminalNode(currentNodeId)) {
509 ArgMaxSet< GUM_SCALAR, Idx > leaf(src->nodeValue(currentNodeId), actionId);
510 nody = argMaxCpy->manager()->addTerminalNode(leaf);
511 } else {
512 const InternalNode* currentNode = src->node(currentNodeId);
513 NodeId* sonsMap = static_cast< NodeId* >(
514 SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
515 for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
516 sonsMap[moda]
517 = _recurArgMaxCopy_(currentNode->son(moda), actionId, src, argMaxCpy, visitedNodes);
518 nody = argMaxCpy->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);
519 }
520 visitedNodes.insert(currentNodeId, nody);
521 return nody;
522 }
523
524 // ===========================================================================
525 // Performs argmax_a Q(s,a)
526 // ===========================================================================
527 template < typename GUM_SCALAR >
531 SetTerminalNodePolicy >* >& qActionsSet) {
533 = qActionsSet.back();
534 qActionsSet.pop_back();
535
536 while (!qActionsSet.empty()) {
538 = qActionsSet.back();
539 qActionsSet.pop_back();
540 newVFunction = operator_->argmaximize(newVFunction, qAction);
541 }
542
543 return newVFunction;
544 }
545
546 // ===========================================================================
547 // Creates a copy of given in parameter decision Graph and replaces leaves
548 // of that Graph by a pair containing value of the leaf and action to which
549 // is bind this Graph (given in parameter).
550 // ===========================================================================
551 template < typename GUM_SCALAR >
554 argMaxOptimalValueFunction) {
555 optimalPolicy_->clear();
556
557 // Insertion des nouvelles variables
559 = argMaxOptimalValueFunction->variablesSequence().beginSafe();
560 varIter != argMaxOptimalValueFunction->variablesSequence().endSafe();
561 ++varIter)
562 optimalPolicy_->add(**varIter);
563
565 optimalPolicy_->manager()->setRootNode(_recurExtractOptPol_(argMaxOptimalValueFunction->root(),
566 argMaxOptimalValueFunction,
567 src2dest));
568
569 delete argMaxOptimalValueFunction;
570 }
571
572 // ==========================================================================
573 // Recursion part for the createArgMaxCopy
574 // ==========================================================================
575 template < typename GUM_SCALAR >
577 NodeId currentNodeId,
579 argMaxOptVFunc,
580 HashTable< NodeId, NodeId >& visitedNodes) {
581 if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
582
583 NodeId nody;
584 if (argMaxOptVFunc->isTerminalNode(currentNodeId)) {
585 ActionSet leaf;
586 _transferActionIds_(argMaxOptVFunc->nodeValue(currentNodeId), leaf);
587 nody = optimalPolicy_->manager()->addTerminalNode(leaf);
588 } else {
589 const InternalNode* currentNode = argMaxOptVFunc->node(currentNodeId);
590 NodeId* sonsMap = static_cast< NodeId* >(
591 SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
592 for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
593 sonsMap[moda] = _recurExtractOptPol_(currentNode->son(moda), argMaxOptVFunc, visitedNodes);
594 nody = optimalPolicy_->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);
595 }
596 visitedNodes.insert(currentNodeId, nody);
597 return nody;
598 }
599
600 // ==========================================================================
601 // Extract from an ArgMaxSet the associated ActionSet
602 // ==========================================================================
603 template < typename GUM_SCALAR >
605 ActionSet& dest) {
606 for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)
607 dest += *idi;
608 }
609
610
611} // end of namespace gum
A class to store the optimal actions.
Definition actionSet.h:105
SequenceIteratorSafe< Idx > endSafe() const
Iterator end.
Definition actionSet.h:166
SequenceIteratorSafe< Idx > beginSafe() const
Iterator beginning.
Definition actionSet.h:161
Class to handle efficiently argMaxSet.
Definition argMaxSet.h:78
SequenceIteratorSafe< GUM_SCALAR_SEQ > beginSafe() const
Iterator beginning.
Definition argMaxSet.h:129
SequenceIteratorSafe< GUM_SCALAR_SEQ > endSafe() const
Iterator end.
Definition argMaxSet.h:134
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.
virtual Size domainSize() const =0
void nextValue() const
Increments the constant safe iterator.
void beginValues() const
Initializes the constant safe iterator on terminal nodes.
bool hasValue() const
Indicates if constant safe iterator has reach end of terminal nodes list.
const GUM_SCALAR & value() const
Returns the value of the current terminal nodes pointed by the constant safe iterator.
This class is used to implement factored decision process.
Definition fmdp.h:73
The class for generic Hash Tables.
Definition hashTable.h:637
const iterator_safe & endSafe() noexcept
Returns the safe iterator pointing to the end of the hashtable.
bool exists(const Key &key) const
Checks whether there exists an element with a given key in the hashtable.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
iterator_safe beginSafe()
Returns the safe iterator pointing to the beginning of the hashtable.
<agrum/FMDP/SDyna/IOperatorStrategy.h>
Structure used to represent a node internal structure.
const DiscreteVariable * nodeVar() const
Returns the node variable.
Idx nbSons() const
Returns the number of sons.
NodeId son(Idx modality) const
Returns the son at a given index.
Chain list allocated using the SmallObjectAllocator.
Definition link.h:155
Class implementingting a function graph.
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
const NodeId & root() const
Returns the id of the root node from the diagram.
bool isTerminalNode(const NodeId &node) const
Indicates if given node is terminal or not.
const InternalNode * node(NodeId n) const
Returns internalNode structure associated to that nodeId.
const GUM_SCALAR & nodeValue(NodeId n) const
Returns value associated to given node.
void copyAndMultiplyByScalar(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, GUM_SCALAR gamma)
Copies src diagrams and multiply every value by the given scalar.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
virtual void add(const DiscreteVariable &v)
Adds a new var to the variables of the multidimensional matrix.
virtual const Sequence< const DiscreteVariable * > & variablesSequence() const override
Returns a const ref to the sequence of DiscreteVariable*.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition set_tpl.h:533
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal.
virtual ~StructuredPlaner()
Default destructor.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a).
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
IOperatorStrategy< GUM_SCALAR > * operator_
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
virtual MultiDimFunctionGraph< double > * valueIteration_()
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*.
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual void initVFunction_()
Performs a single step of value iteration.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a).
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< double > * evalQaction_(const MultiDimFunctionGraph< double > *, Idx)
const std::string & name() const
returns the name of the variable
#define RECAST(x)
Definition fmdp_tpl.h:57
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
Definition types.h:79
Size NodeId
Type for node ids.
Header files of gum::Instantiation.
Useful macros for maths.
Headers of MultiDimFunctionGraph.
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
#define SOA_ALLOCATE(x)
Headers of the StructuredPlaner planer class.
Header of the Tensor class.