aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
E_GreedyDecider.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
49
50// =========================================================================
51#include <cstdlib>
52// =========================================================================
55
56// =========================================================================
57
58namespace gum {
59
60 // ==========================================================================
61 // Constructor & destructor.
62 // ==========================================================================
63
64 // ###################################################################
68 // ###################################################################
70 GUM_CONSTRUCTOR(E_GreedyDecider);
71
72 _sss_ = 1.0;
73 }
74
75 // ###################################################################
79 // ###################################################################
81 GUM_DESTRUCTOR(E_GreedyDecider);
82 ;
83 }
84
85 // ==========================================================================
86 // Initialization
87 // ==========================================================================
88
89 // ###################################################################
93 // ###################################################################
96 for (auto varIter = fmdp->beginVariables(); varIter != fmdp->endVariables(); ++varIter)
97 _sss_ *= (double)(*varIter)->domainSize();
98 }
99
100 // ==========================================================================
101 // Incremental methods
102 // ==========================================================================
103
104 // ###################################################################
105 /*
106 * Performs a feedback on the last transition.
107 * In extenso, learn from the transition.
108 * @param reachedState : the state reached after the transition
109 */
110 // ###################################################################
111 void E_GreedyDecider::checkState(const Instantiation& reachedState, Idx actionId) {
112 if (_statecpt_.nbVisitedStates() == 0) _statecpt_.reset(reachedState);
113 else if (!_statecpt_.checkState(reachedState)) _statecpt_.addState(reachedState);
114 }
115
116 // ###################################################################
117 /*
118 * @param the state in which we currently are
119 * @return a set containing every optimal actions on that state
120 */
121 // ###################################################################
123 double explo = randomProba();
124 double temp = std::pow((_sss_ - (double)_statecpt_.nbVisitedStates()) / _sss_, 3.0);
125 double exploThreshold = temp < 0.1 ? 0.1 : temp;
126
127 // std::cout << exploThreshold << std::endl;
128
130 if (explo > exploThreshold) {
131 // std::cout << "Exploit : " << optimalSet << std::endl;
132 return optimalSet;
133 }
134
135 if (allActions_.size() > optimalSet.size()) {
137 ret -= optimalSet;
138 // std::cout << "Explore : " << ret << std::endl;
139 return ret;
140 }
141
142 // std::cout << "Explore : " << allActions_ << std::endl;
143 return allActions_;
144 }
145
146} // End of namespace gum
Headers of the epsilon-greedy decision maker class.
A class to store the optimal actions.
Definition actionSet.h:105
Size size() const
Gives the size.
Definition actionSet.h:224
void initialize(const FMDP< double > *fmdp)
Initializes the learner.
E_GreedyDecider()
Constructor.
ActionSet stateOptimalPolicy(const Instantiation &curState)
~E_GreedyDecider()
Destructor.
void checkState(const Instantiation &newState, Idx actionId)
SequenceIteratorSafe< const DiscreteVariable * > endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition fmdp.h:116
SequenceIteratorSafe< const DiscreteVariable * > beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition fmdp.h:109
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
Class for assigning/browsing values to tuples of discrete variables.
Size Idx
Type for indexes.
Definition types.h:79
double randomProba()
Returns a random double between 0 and 1 included (i.e.
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Contains useful methods for random stuff.