aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
sdyna.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
49
50
51// =========================================================================
52#include <cstdlib>
53// =========================================================================
55
56// =========================================================================
57
58namespace gum {
59
60 // ==========================================================================
61 // Constructor & destructor.
62 // ==========================================================================
63
64 // ###################################################################
65 /*
66 * Constructor
67 *
68 * @param observationPhaseLenght : the number of observation done before a
69 * replanning is launch. If equals 0, a planning is done after each structural
70 * change.
71 * @param nbValueIterationStep : the number of value iteration done during
72 * one planning
73 * @return an instance of SDyna architecture
74 */
75 // ###################################################################
76
79 IDecisionStrategy* decider,
80 Idx observationPhaseLenght,
81 Idx nbValueIterationStep,
82 bool actionReward,
83 bool verbose) :
84 _learner_(learner), _planer_(planer), _decider_(decider),
85 _observationPhaseLenght_(observationPhaseLenght),
86 _nbValueIterationStep_(nbValueIterationStep), _actionReward_(actionReward),
87 verbose_(verbose) {
88 GUM_CONSTRUCTOR(SDYNA);
89
90 fmdp_ = new FMDP< double >();
91
93 }
94
95 // ###################################################################
96 // Destructor
97 // ###################################################################
99 delete _decider_;
100
101 delete _learner_;
102
103 delete _planer_;
104
105 for (auto obsIter = _bin_.beginSafe(); obsIter != _bin_.endSafe(); ++obsIter)
106 delete *obsIter;
107
108 delete fmdp_;
109
110 GUM_DESTRUCTOR(SDYNA);
111 }
112
113 // ==========================================================================
114 // Initialization
115 // ==========================================================================
116
118 _learner_->initialize(fmdp_);
119 _planer_->initialize(fmdp_);
120 _decider_->initialize(fmdp_);
121 }
122
123 // ###################################################################
124 /*
125 * Initializes the Sdyna instance.
126 * @param initialState : the state of the studied system from which we will
127 * begin the explore, learn and exploit process
128 */
129 // ###################################################################
130 void SDYNA::initialize(const Instantiation& initialState) {
131 initialize();
132 setCurrentState(initialState);
133 }
134
135 // ==========================================================================
137 // ==========================================================================
138
139 // ###################################################################
140 /*
141 * Performs a feedback on the last transition.
142 * In extenso, learn from the transition.
143 * @param originalState : the state we were in before the transition
144 * @param reachedState : the state we reached after
145 * @param performedAction : the action we performed
146 * @param obtainedReward : the reward we obtained
147 */
148 // ###################################################################
149 void SDYNA::feedback(const Instantiation& curState,
150 const Instantiation& prevState,
151 Idx lastAction,
152 double reward) {
153 _lastAction_ = lastAction;
154 lastState_ = prevState;
155 feedback(curState, reward);
156 }
157
158 // ###################################################################
159 /*
160 * Performs a feedback on the last transition.
161 * In extenso, learn from the transition.
162 * @param reachedState : the state reached after the transition
163 * @param obtainedReward : the reward obtained during the transition
164 * @warning Uses the _originalState_ and _performedAction_ stored in cache
165 * If you want to specify the original state and the performed action, see
166 * below
167 */
168 // ###################################################################
169 void SDYNA::feedback(const Instantiation& newState, double reward) {
170 Observation* obs = new Observation();
171
172 for (auto varIter = lastState_.variablesSequence().beginSafe();
173 varIter != lastState_.variablesSequence().endSafe();
174 ++varIter)
175 obs->setModality(*varIter, lastState_.val(**varIter));
176
177 for (auto varIter = newState.variablesSequence().beginSafe();
178 varIter != newState.variablesSequence().endSafe();
179 ++varIter) {
180 obs->setModality(fmdp_->main2prime(*varIter), newState.val(**varIter));
181
182 if (this->_actionReward_) obs->setRModality(*varIter, lastState_.val(**varIter));
183 else obs->setRModality(*varIter, newState.val(**varIter));
184 }
185
186 obs->setReward(reward);
187
188 _learner_->addObservation(_lastAction_, obs);
189 _bin_.insert(obs);
190
191 setCurrentState(newState);
192 _decider_->checkState(lastState_, _lastAction_);
193
195
197 }
198
199 // ###################################################################
200 /*
201 * Starts a new planning
202 * @param Idx : the maximal number of value iteration performed in this
203 * planning
204 */
205 // ###################################################################
206 void SDYNA::makePlanning(Idx nbValueIterationStep) {
207 if (verbose_) std::cout << "Updating decision trees ..." << std::endl;
208 _learner_->updateFMDP();
209 // std::cout << << "Done" << std::endl;
210
211 if (verbose_) std::cout << "Planning ..." << std::endl;
212 _planer_->makePlanning(nbValueIterationStep);
213 // std::cout << << "Done" << std::endl;
214
215 _decider_->setOptimalStrategy(_planer_->optimalPolicy());
216 }
217
218 // ##################################################################
219 /*
220 * @return the id of the action the SDyna instance wish to be performed
221 * @param the state in which we currently are
222 */
223 // ###################################################################
225 lastState_ = curState;
226 return takeAction();
227 }
228
229 // ###################################################################
230 /*
231 * @return the id of the action the SDyna instance wish to be performed
232 */
233 // ###################################################################
235 ActionSet actionSet = _decider_->stateOptimalPolicy(lastState_);
236 if (actionSet.size() == 1) {
237 _lastAction_ = actionSet[0];
238 } else {
239 Idx randy = randomValue(actionSet.size());
240 _lastAction_ = actionSet[randy == actionSet.size() ? 0 : randy];
241 }
242 return _lastAction_;
243 }
244
245 // ###################################################################
246 //
247 // ###################################################################
248 std::string SDYNA::toString() {
249 std::stringstream description;
250
251 description << fmdp_->toString() << std::endl;
252 description << _planer_->optimalPolicy2String() << std::endl;
253
254 return description.str();
255 }
256
257} // End of namespace gum
A class to store the optimal actions.
Definition actionSet.h:105
Size size() const
Gives the size.
Definition actionSet.h:224
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/SDyna/ILearningStrategy.h>
Class for assigning/browsing values to tuples of discrete variables.
const Sequence< const DiscreteVariable * > & variablesSequence() const final
Returns the sequence of DiscreteVariable of this instantiation.
Idx val(Idx i) const
Returns the current value of the variable at position i.
void setReward(double reward)
Returns the modality assumed by the given variable in this observation.
INLINE void setRModality(const DiscreteVariable *var, Idx modality)
Returns the modality assumed by the given variable in this observation.
INLINE void setModality(const DiscreteVariable *var, Idx modality)
Sets the modality assumed by the given variable in this observation.
void initialize()
Initializes the Sdyna instance.
Definition sdyna.cpp:117
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Definition sdyna.h:433
Idx _lastAction_
The last performed action.
Definition sdyna.h:452
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Definition sdyna.h:449
Instantiation lastState_
The state in which the system is before we perform a new action.
Definition sdyna.h:429
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition sdyna.h:310
~SDYNA()
Destructor.
Definition sdyna.cpp:98
Idx takeAction()
Definition sdyna.cpp:234
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
Definition sdyna.h:436
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Definition sdyna.h:426
Set< Observation * > _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Definition sdyna.h:455
Idx _nbObservation_
The total number of observation made so far.
Definition sdyna.h:446
bool _actionReward_
Definition sdyna.h:457
IDecisionStrategy * _decider_
The decider.
Definition sdyna.h:439
bool verbose_
Definition sdyna.h:459
std::string toString()
Returns.
Definition sdyna.cpp:248
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition sdyna.cpp:149
void makePlanning(Idx nbStep)
Starts a new planning.
Definition sdyna.cpp:206
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
Definition sdyna.h:443
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition sdyna.cpp:77
Size Idx
Type for indexes.
Definition types.h:79
Idx randomValue(const Size max=2)
Returns a random Idx between 0 and max-1 included.
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Headers of the SDyna abstract class.