aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
49
50// =========================================================================
51#include <queue>
52#include <vector>
53// #include <algorithm>
54// #include <utility>
55// =========================================================================
58
60// =========================================================================
64// =========================================================================
66// =========================================================================
67
69#define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x)
70
71namespace gum {
72
73 /* **************************************************************************************************
74 * **/
75 /* ** **/
76 /* ** Constructors / Destructors **/
77 /* ** **/
78 /* **************************************************************************************************
79 * **/
80
81 // ===========================================================================
82 // Default constructor
83 // ===========================================================================
85 double discountFactor,
86 double epsilon,
87 const ILearningStrategy* learner,
88 bool verbose) :
89 StructuredPlaner(opi, discountFactor, epsilon, verbose), IDecisionStrategy(),
90 _fmdpLearner_(learner), _initialized_(false) {
91 GUM_CONSTRUCTOR(AdaptiveRMaxPlaner);
92 }
93
94 // ===========================================================================
95 // Default destructor
96 // ===========================================================================
98 GUM_DESTRUCTOR(AdaptiveRMaxPlaner);
99
101 scIter != _counterTable_.endSafe();
102 ++scIter)
103 delete scIter.val();
104 }
105
106 /* **************************************************************************************************
107 * **/
108 /* ** **/
109 /* ** Planning Methods **/
110 /* ** **/
111 /* **************************************************************************************************
112 * **/
113
114 // ==========================================================================
115 // Initializes data structure needed for making the planning
116 // ==========================================================================
118 if (!_initialized_) {
121 for (auto actionIter = fmdp->beginActions(); actionIter != fmdp->endActions(); ++actionIter) {
122 _counterTable_.insert(*actionIter, new StatesCounter());
123 _initializedTable_.insert(*actionIter, false);
124 }
125 _initialized_ = true;
126 }
127 }
128
129 // ===========================================================================
130 // Performs a value iteration
131 // ===========================================================================
139
140 /* **************************************************************************************************
141 * **/
142 /* ** **/
143 /* ** Value Iteration Methods **/
144 /* ** **/
145 /* **************************************************************************************************
146 * **/
147
148 // ===========================================================================
149 // Performs a single step of value iteration
150 // ===========================================================================
152 vFunction_->manager()->setRootNode(vFunction_->manager()->addTerminalNode(0.0));
153 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter)
154 vFunction_ = this->operator_->add(vFunction_, RECASTED(this->fmdp_->reward(*actionIter)), 1);
155 }
156
157 // ===========================================================================
158 // Performs a single step of value iteration
159 // ===========================================================================
161 // *****************************************************************************************
162 // Loop reset
163 MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance();
164 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
165
166 // *****************************************************************************************
167 // For each action
168 std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
169 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
170 MultiDimFunctionGraph< double >* qAction = evalQaction_(newVFunction, *actionIter);
171
172 // *******************************************************************************************
173 // Next, we add the reward
174 qAction = addReward_(qAction, *actionIter);
175
176 qAction = this->operator_->maximize(
177 _actionsRMaxTable_[*actionIter],
178 this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1),
179 2);
180
181 qActionsSet.push_back(qAction);
182 }
183 delete newVFunction;
184
185 // *****************************************************************************************
186 // Next to evaluate main value function, we take maximise over all action
187 // value, ...
188 newVFunction = maximiseQactions_(qActionsSet);
189
190 return newVFunction;
191 }
192
193 /* **************************************************************************************************
194 * **/
195 /* ** **/
196 /* ** Optimal Policy Evaluation Methods **/
197 /* ** **/
198 /* **************************************************************************************************
199 * **/
200
201 // ===========================================================================
202 // Evals the policy corresponding to the given value function
203 // ===========================================================================
205 // *****************************************************************************************
206 // Loop reset
207 MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance();
208 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
209
210 std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >* >
211 argMaxQActionsSet;
212 // *****************************************************************************************
213 // For each action
214 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
215 MultiDimFunctionGraph< double >* qAction = this->evalQaction_(newVFunction, *actionIter);
216
217 qAction = this->addReward_(qAction, *actionIter);
218
219 qAction = this->operator_->maximize(
220 _actionsRMaxTable_[*actionIter],
221 this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1),
222 2);
223
224 argMaxQActionsSet.push_back(makeArgMax_(qAction, *actionIter));
225 }
226 delete newVFunction;
227
228 // *****************************************************************************************
229 // Next to evaluate main value function, we take maximise over all action
230 // value, ...
232 = argmaximiseQactions_(argMaxQActionsSet);
233
234 // *****************************************************************************************
235 // Next to evaluate main value function, we take maximise over all action
236 // value, ...
237 extractOptimalPolicy_(argMaxVFunction);
238 }
239
240 // ===========================================================================
241 //
242 // ===========================================================================
244 _rThreshold_ = _fmdpLearner_->modaMax() * 5 > 30 ? _fmdpLearner_->modaMax() * 5 : 30;
245 _rmax_ = _fmdpLearner_->rMax() / (1.0 - this->discountFactor_);
246
247 for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions();
248 ++actionIter) {
249 std::vector< MultiDimFunctionGraph< double >* > rmaxs;
250 std::vector< MultiDimFunctionGraph< double >* > boolQs;
251
252 for (auto varIter = this->fmdp()->beginVariables(); varIter != this->fmdp()->endVariables();
253 ++varIter) {
254 const IVisitableGraphLearner* visited = _counterTable_[*actionIter];
255
256 MultiDimFunctionGraph< double >* varRMax = this->operator_->getFunctionInstance();
257 MultiDimFunctionGraph< double >* varBoolQ = this->operator_->getFunctionInstance();
258
259 visited->insertSetOfVars(varRMax);
260 visited->insertSetOfVars(varBoolQ);
261
262 std::pair< NodeId, NodeId > rooty
263 = _visitLearner_(visited, visited->root(), varRMax, varBoolQ);
264 varRMax->manager()->setRootNode(rooty.first);
265 varRMax->manager()->reduce();
266 varRMax->manager()->clean();
267 varBoolQ->manager()->setRootNode(rooty.second);
268 varBoolQ->manager()->reduce();
269 varBoolQ->manager()->clean();
270
271 rmaxs.push_back(varRMax);
272 boolQs.push_back(varBoolQ);
273
274 // std::cout << RECASTED(this->fmdp_->transition(*actionIter,
275 // *varIter))->toDot() << std::endl;
276 // for( auto varIter2 =
277 // RECASTED(this->fmdp_->transition(*actionIter,
278 // *varIter))->variablesSequence().beginSafe(); varIter2 !=
279 // RECASTED(this->fmdp_->transition(*actionIter,
280 // *varIter))->variablesSequence().endSafe(); ++varIter2 )
281 // std::cout << (*varIter2)->name() << " | ";
282 // std::cout << std::endl;
283
284 // std::cout << varRMax->toDot() << std::endl;
285 // for( auto varIter =
286 // varRMax->variablesSequence().beginSafe(); varIter !=
287 // varRMax->variablesSequence().endSafe(); ++varIter )
288 // std::cout << (*varIter)->name() << " | ";
289 // std::cout << std::endl;
290
291 // std::cout << varBoolQ->toDot() << std::endl;
292 // for( auto varIter =
293 // varBoolQ->variablesSequence().beginSafe(); varIter !=
294 // varBoolQ->variablesSequence().endSafe(); ++varIter )
295 // std::cout << (*varIter)->name() << " | ";
296 // std::cout << std::endl;
297 }
298
299 // std::cout << "Maximising" << std::endl;
300 _actionsRMaxTable_.insert(*actionIter, this->maximiseQactions_(rmaxs));
301 _actionsBoolTable_.insert(*actionIter, this->minimiseFunctions_(boolQs));
302 }
303 }
304
305 // ===========================================================================
306 //
307 // ===========================================================================
308 std::pair< NodeId, NodeId >
310 NodeId currentNodeId,
313 std::pair< NodeId, NodeId > rep;
314 if (visited->isTerminal(currentNodeId)) {
315 rep.first = rmax->manager()->addTerminalNode(
316 visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? _rmax_ : 0.0);
317 rep.second = boolQ->manager()->addTerminalNode(
318 visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? 0.0 : 1.0);
319 return rep;
320 }
321
322 auto rmaxsons = static_cast< NodeId* >(
323 SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
324 auto bqsons = static_cast< NodeId* >(
325 SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
326
327 for (Idx moda = 0; moda < visited->nodeVar(currentNodeId)->domainSize(); ++moda) {
328 std::pair< NodeId, NodeId > sonp
329 = _visitLearner_(visited, visited->nodeSon(currentNodeId, moda), rmax, boolQ);
330 rmaxsons[moda] = sonp.first;
331 bqsons[moda] = sonp.second;
332 }
333
334 rep.first = rmax->manager()->addInternalNode(visited->nodeVar(currentNodeId), rmaxsons);
335 rep.second = boolQ->manager()->addInternalNode(visited->nodeVar(currentNodeId), bqsons);
336 return rep;
337 }
338
339 // ===========================================================================
340 //
341 // ===========================================================================
343 for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions();
344 ++actionIter) {
345 delete _actionsBoolTable_[*actionIter];
346 delete _actionsRMaxTable_[*actionIter];
347 }
348 _actionsRMaxTable_.clear();
349 _actionsBoolTable_.clear();
350 }
351
352} // end of namespace gum
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
Headers of the RMax planer class.
Safe Iterators for hashtables.
HashTable< Idx, bool > _initializedTable_
const ILearningStrategy * _fmdpLearner_
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsBoolTable_
HashTable< Idx, StatesCounter * > _counterTable_
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
virtual void initVFunction_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
~AdaptiveRMaxPlaner()
Default destructor.
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsRMaxTable_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
virtual Size domainSize() const =0
SequenceIteratorSafe< const DiscreteVariable * > endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition fmdp.h:116
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition fmdp.h:156
<agrum/FMDP/SDyna/IDecisionStrategy.h>
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
virtual const DiscreteVariable * nodeVar(NodeId ni) const =0
virtual NodeId root() const =0
virtual bool isTerminal(NodeId ni) const =0
virtual void insertSetOfVars(MultiDimFunctionGraph< double > *) const =0
virtual NodeId nodeSon(NodeId ni, Idx modality) const =0
virtual Idx nodeNbObservation(NodeId ni) const =0
void clean()
Removes var without nodes in the diagram.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
NodeId addTerminalNode(const GUM_SCALAR &value)
Adds a value to the MultiDimFunctionGraph.
virtual void reduce()=0
Ensures that every isomorphic subgraphs are merged together.
NodeId addInternalNode(const DiscreteVariable *var)
Inserts a new non terminal node in graph.
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
<agrum/FMDP/simulation/statesCounter.h>
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
virtual MultiDimFunctionGraph< double > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< double > * > &)
virtual MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * > &)
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
virtual MultiDimFunctionGraph< double > * addReward_(MultiDimFunctionGraph< double > *function, Idx actionId=0)
IOperatorStrategy< double > * operator_
StructuredPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, bool verbose)
INLINE const FMDP< double > * fmdp()
MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< double > *Qaction, Idx actionId)
virtual MultiDimFunctionGraph< double > * maximiseQactions_(std::vector< MultiDimFunctionGraph< double > * > &)
MultiDimFunctionGraph< double > * vFunction_
virtual MultiDimFunctionGraph< double > * evalQaction_(const MultiDimFunctionGraph< double > *, Idx)
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
Definition types.h:79
Size NodeId
Type for node ids.
Header files of gum::Instantiation.
Useful macros for maths.
Headers of MultiDimFunctionGraph.
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Headers of gum::SmallObjectAllocator.
#define SOA_ALLOCATE(x)
Header of the Tensor class.