aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
sdyna.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
49
50// =========================================================================
51#ifndef GUM_SDYNA_H
52#define GUM_SDYNA_H
53// =========================================================================
54#include <agrum/agrum.h>
55// =========================================================================
56// =========================================================================
60
62
63// =========================================================================
64
65namespace gum {
66
77 class SDYNA {
78 // ###################################################################
80 // ###################################################################
82
83 public:
84 // ==========================================================================
86 // ==========================================================================
87 static SDYNA* spitiInstance(double attributeSelectionThreshold = 0.99,
88 double discountFactor = 0.9,
89 double epsilon = 1,
90 Idx observationPhaseLenght = 100,
91 Idx nbValueIterationStep = 10) {
92 bool actionReward = false;
94 = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
95 actionReward);
97 = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
99 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
100 }
101
102 // ==========================================================================
104 // ==========================================================================
105 static SDYNA* spimddiInstance(double attributeSelectionThreshold = 0.99,
106 double similarityThreshold = 0.3,
107 double discountFactor = 0.9,
108 double epsilon = 1,
109 Idx observationPhaseLenght = 100,
110 Idx nbValueIterationStep = 10) {
111 bool actionReward = false;
113 = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
114 actionReward,
115 similarityThreshold);
117 = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon, false);
119 return new SDYNA(ls,
120 ps,
121 ds,
122 observationPhaseLenght,
123 nbValueIterationStep,
124 actionReward,
125 false);
126 }
127
128 // ==========================================================================
130 // ==========================================================================
131 static SDYNA* RMaxMDDInstance(double attributeSelectionThreshold = 0.99,
132 double similarityThreshold = 0.3,
133 double discountFactor = 0.9,
134 double epsilon = 1,
135 Idx observationPhaseLenght = 100,
136 Idx nbValueIterationStep = 10) {
137 bool actionReward = true;
139 = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
140 actionReward,
141 similarityThreshold);
143 = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls, discountFactor, epsilon);
145 IDecisionStrategy* ds = rm;
146 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
147 }
148
149 // ==========================================================================
151 // ==========================================================================
152 static SDYNA* RMaxTreeInstance(double attributeSelectionThreshold = 0.99,
153 double discountFactor = 0.9,
154 double epsilon = 1,
155 Idx observationPhaseLenght = 100,
156 Idx nbValueIterationStep = 10) {
157 bool actionReward = true;
159 = new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold, actionReward);
160 AdaptiveRMaxPlaner* rm = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
162 IDecisionStrategy* ds = rm;
163 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
164 }
165
166 // ==========================================================================
168 // ==========================================================================
169 static SDYNA* RandomMDDInstance(double attributeSelectionThreshold = 0.99,
170 double similarityThreshold = 0.3,
171 double discountFactor = 0.9,
172 double epsilon = 1,
173 Idx observationPhaseLenght = 100,
174 Idx nbValueIterationStep = 10) {
175 bool actionReward = true;
177 = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
178 actionReward,
179 similarityThreshold);
181 = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon);
183 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
184 }
185
186 // ==========================================================================
188 // ==========================================================================
189 static SDYNA* RandomTreeInstance(double attributeSelectionThreshold = 0.99,
190 double discountFactor = 0.9,
191 double epsilon = 1,
192 Idx observationPhaseLenght = 100,
193 Idx nbValueIterationStep = 10) {
194 bool actionReward = true;
196 = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
197 actionReward);
199 = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
201 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
202 }
203
205
206 // ###################################################################
208 // ###################################################################
210
211 // ==========================================================================
217 // ==========================================================================
218
219 private:
220 SDYNA(ILearningStrategy* learner,
222 IDecisionStrategy* decider,
223 Idx observationPhaseLenght,
224 Idx nbValueIterationStep,
225 bool actionReward,
226 bool verbose = true);
227
228 // ==========================================================================
230 // ==========================================================================
231
232 public:
233 ~SDYNA();
234
236
237
238 // ###################################################################
240 // ###################################################################
242
243 public:
244 // ==========================================================================
251 // ==========================================================================
252 void addAction(const Idx actionId, const std::string& actionName) {
253 fmdp_->addAction(actionId, actionName);
254 }
255
256 // ==========================================================================
265 // ==========================================================================
266 void addVariable(const DiscreteVariable* var) { fmdp_->addVariable(var); }
267
269
270
271 // ###################################################################
273 // ###################################################################
275
276 public:
277 // ==========================================================================
281 // ==========================================================================
282 void initialize();
283
284 // ==========================================================================
290 // ==========================================================================
291 void initialize(const Instantiation& initialState);
292
294
295
296 // ###################################################################
298 // ###################################################################
300
301 public:
302 // ==========================================================================
309 // ==========================================================================
310 void setCurrentState(const Instantiation& currentState) { lastState_ = currentState; }
311
312 // ==========================================================================
318 // ==========================================================================
319 Idx takeAction(const Instantiation& curState);
320
321 // ==========================================================================
325 // ==========================================================================
326 Idx takeAction();
327
328 // ==========================================================================
337 // ==========================================================================
338 void feedback(const Instantiation& originalState,
339 const Instantiation& reachedState,
340 Idx performedAction,
341 double obtainedReward);
342
343 // ==========================================================================
353 // ==========================================================================
354 void feedback(const Instantiation& reachedState, double obtainedReward);
355
356 // ==========================================================================
362 // ==========================================================================
363 void makePlanning(Idx nbStep);
364
366
367
368 public:
369 // ==========================================================================
376 // ==========================================================================
377 std::string toString();
378
379 std::string optimalPolicy2String() { return _planer_->optimalPolicy2String(); }
380
381 // ###################################################################
385 // ###################################################################
387
388 public:
389 // ==========================================================================
394 // ==========================================================================
395 Size learnerSize() { return _learner_->size(); }
396
397 // ==========================================================================
402 // ==========================================================================
403 Size modelSize() { return fmdp_->size(); }
404
405 // ==========================================================================
410 // ==========================================================================
411 Size valueFunctionSize() { return _planer_->vFunctionSize(); }
412
413 // ==========================================================================
418 // ==========================================================================
419 Size optimalPolicySize() { return _planer_->optimalPolicySize(); }
420
422
423
424 protected:
427
430
431 private:
434
437
440
441
444
447
450
453
456
458
460 };
461
462
463} /* namespace gum */
464
465
466#endif // GUM_SDYNA_H
Headers of the epsilon-greedy decision maker class.
Headers of the RMax planer class.
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Base class for discrete random variable.
<agrum/FMDP/decision/E_GreedyDecider.h>
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/SDyna/ILearningStrategy.h>
Class for assigning/browsing values to tuples of discrete variables.
Class to make decision randomly.
void initialize()
Initializes the Sdyna instance.
Definition sdyna.cpp:117
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Definition sdyna.h:433
Size optimalPolicySize()
optimalPolicySize
Definition sdyna.h:419
Idx _lastAction_
The last performed action.
Definition sdyna.h:452
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Definition sdyna.h:449
Instantiation lastState_
The state in which the system is before we perform a new action.
Definition sdyna.h:429
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:169
Size modelSize()
modelSize
Definition sdyna.h:403
Size learnerSize()
learnerSize
Definition sdyna.h:395
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition sdyna.h:310
~SDYNA()
Destructor.
Definition sdyna.cpp:98
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:87
Idx takeAction()
Definition sdyna.cpp:234
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
Definition sdyna.h:436
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Definition sdyna.h:426
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Definition sdyna.h:252
Set< Observation * > _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Definition sdyna.h:455
Size valueFunctionSize()
valueFunctionSize
Definition sdyna.h:411
Idx _nbObservation_
The total number of observation made so far.
Definition sdyna.h:446
bool _actionReward_
Definition sdyna.h:457
IDecisionStrategy * _decider_
The decider.
Definition sdyna.h:439
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:131
std::string optimalPolicy2String()
Definition sdyna.h:379
bool verbose_
Definition sdyna.h:459
std::string toString()
Returns.
Definition sdyna.cpp:248
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition sdyna.cpp:149
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
Definition sdyna.h:266
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:105
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:152
void makePlanning(Idx nbStep)
Starts a new planning.
Definition sdyna.cpp:206
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
Definition sdyna.h:443
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition sdyna.cpp:77
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition sdyna.h:189
Representation of a set.
Definition set.h:131
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Size Idx
Type for indexes.
Definition types.h:79
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Headers of the Random decision maker class.
Headers of the Statistical lazy decision maker class.