aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
49
50// =========================================================================
51#ifndef GUM_ADAPTIVE_RMAX_PLANER_H
52#define GUM_ADAPTIVE_RMAX_PLANER_H
53// =========================================================================
58
59// =========================================================================
60
61namespace gum {
62
73 class AdaptiveRMaxPlaner: public StructuredPlaner< double >, public IDecisionStrategy {
74 // ###################################################################
76 // ###################################################################
78
79 public:
80 // ==========================================================================
82 // ==========================================================================
84 double discountFactor = 0.9,
85 double epsilon = 0.00001,
86 bool verbose = true) {
88 discountFactor,
89 epsilon,
90 learner,
91 verbose);
92 }
93
94 // ==========================================================================
96 // ==========================================================================
98 double discountFactor = 0.9,
99 double epsilon = 0.00001,
100 bool verbose = true) {
102 discountFactor,
103 epsilon,
104 learner,
105 verbose);
106 }
107
109
110 // ###################################################################
112 // ###################################################################
114
115 private:
116 // ==========================================================================
118 // ==========================================================================
120 double discountFactor,
121 double epsilon,
122 const ILearningStrategy* learner,
123 bool verbose);
124
125 // ==========================================================================
127 // ==========================================================================
128
129 public:
131
133
134
135 // ###################################################################
137 // ###################################################################
139
140 public:
141 // ==========================================================================
147 // ==========================================================================
148 void initialize(const FMDP< double >* fmdp);
149
150
151 // ==========================================================================
160 // ==========================================================================
161 void makePlanning(Idx nbStep = 1000000);
162
164
165
166 // ###################################################################
168 // ###################################################################
170
171 protected:
172 // ==========================================================================
174 // ==========================================================================
175 virtual void initVFunction_();
176
177 // ==========================================================================
179 // ==========================================================================
181
183
184
185 // ###################################################################
187 // ###################################################################
189
190 protected:
191 // ==========================================================================
193 // ==========================================================================
194 virtual void evalPolicy_();
195
197
198 private:
200
201 std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner*,
202 NodeId currentNodeId,
205 void _clearTables_();
206
207 private:
211
213 double _rmax_;
214
215
216 // ###################################################################
218 // ###################################################################
220
221 public:
222 void checkState(const Instantiation& newState, Idx actionId) {
223 if (!_initializedTable_[actionId]) {
224 _counterTable_[actionId]->reset(newState);
225 _initializedTable_[actionId] = true;
226 } else _counterTable_[actionId]->incState(newState);
227 }
228
229 private:
232
235 };
236
237} /* namespace gum */
238
239#endif // GUM_ADAPTIVE_RMAX_PLANER_H
Headers of the Decision Strategy interface.
void checkState(const Instantiation &newState, Idx actionId)
HashTable< Idx, bool > _initializedTable_
const ILearningStrategy * _fmdpLearner_
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsBoolTable_
HashTable< Idx, StatesCounter * > _counterTable_
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
virtual void initVFunction_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
~AdaptiveRMaxPlaner()
Default destructor.
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
HashTable< Idx, MultiDimFunctionGraph< double > * > _actionsRMaxTable_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
The class for generic Hash Tables.
Definition hashTable.h:637
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
Class for assigning/browsing values to tuples of discrete variables.
<agrum/FMDP/planning/mddOperatorStrategy.h>
StructuredPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, bool verbose)
INLINE const FMDP< double > * fmdp()
<agrum/FMDP/planning/treeOperatorStrategy.h>
Headers of the FMDPLearner class.
Size Idx
Type for indexes.
Definition types.h:79
Size NodeId
Type for node ids.
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Headers of the States Counter class.
Headers of the StructuredPlaner planer class.