88 double discountFactor = 0.9,
90 Idx observationPhaseLenght = 100,
91 Idx nbValueIterationStep = 10) {
92 bool actionReward =
false;
99 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
106 double similarityThreshold = 0.3,
107 double discountFactor = 0.9,
109 Idx observationPhaseLenght = 100,
110 Idx nbValueIterationStep = 10) {
111 bool actionReward =
false;
115 similarityThreshold);
122 observationPhaseLenght,
123 nbValueIterationStep,
132 double similarityThreshold = 0.3,
133 double discountFactor = 0.9,
135 Idx observationPhaseLenght = 100,
136 Idx nbValueIterationStep = 10) {
137 bool actionReward =
true;
141 similarityThreshold);
146 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
153 double discountFactor = 0.9,
155 Idx observationPhaseLenght = 100,
156 Idx nbValueIterationStep = 10) {
157 bool actionReward =
true;
163 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
170 double similarityThreshold = 0.3,
171 double discountFactor = 0.9,
173 Idx observationPhaseLenght = 100,
174 Idx nbValueIterationStep = 10) {
175 bool actionReward =
true;
179 similarityThreshold);
183 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
190 double discountFactor = 0.9,
192 Idx observationPhaseLenght = 100,
193 Idx nbValueIterationStep = 10) {
194 bool actionReward =
true;
201 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
223 Idx observationPhaseLenght,
224 Idx nbValueIterationStep,
226 bool verbose =
true);
253 fmdp_->addAction(actionId, actionName);
341 double obtainedReward);
Headers of the epsilon-greedy decision maker class.
Headers of the RMax planer class.
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Base class for discrete random variable.
<agrum/FMDP/decision/E_GreedyDecider.h>
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/SDyna/ILearningStrategy.h>
Class for assigning/browsing values to tuples of discrete variables.
Class to make decision randomly.
void initialize()
Initializes the Sdyna instance.
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Size optimalPolicySize()
optimalPolicySize
Idx _lastAction_
The last performed action.
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Instantiation lastState_
The state in which the system is before we perform a new action.
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Size modelSize()
modelSize
Size learnerSize()
learnerSize
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Set< Observation * > _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Size valueFunctionSize()
valueFunctionSize
Idx _nbObservation_
The total number of observation made so far.
IDecisionStrategy * _decider_
The decider.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
std::string optimalPolicy2String()
std::string toString()
Returns.
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void makePlanning(Idx nbStep)
Starts a new planning.
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size Idx
Type for indexes.
gum is the global namespace for all aGrUM entities
Headers of the Random decision maker class.
Headers of the Statistical lazy decision maker class.