80 Idx observationPhaseLenght,
81 Idx nbValueIterationStep,
88 GUM_CONSTRUCTOR(
SDYNA);
105 for (
auto obsIter =
_bin_.beginSafe(); obsIter !=
_bin_.endSafe(); ++obsIter)
110 GUM_DESTRUCTOR(
SDYNA);
172 for (
auto varIter =
lastState_.variablesSequence().beginSafe();
173 varIter !=
lastState_.variablesSequence().endSafe();
207 if (
verbose_) std::cout <<
"Updating decision trees ..." << std::endl;
211 if (
verbose_) std::cout <<
"Planning ..." << std::endl;
212 _planer_->makePlanning(nbValueIterationStep);
236 if (actionSet.
size() == 1) {
249 std::stringstream description;
251 description <<
fmdp_->toString() << std::endl;
252 description <<
_planer_->optimalPolicy2String() << std::endl;
254 return description.str();
A class to store the optimal actions.
Size size() const
Gives the size.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/SDyna/ILearningStrategy.h>
Class for assigning/browsing values to tuples of discrete variables.
const Sequence< const DiscreteVariable * > & variablesSequence() const final
Returns the sequence of DiscreteVariable of this instantiation.
Idx val(Idx i) const
Returns the current value of the variable at position i.
void setReward(double reward)
Returns the modality assumed by the given variable in this observation.
INLINE void setRModality(const DiscreteVariable *var, Idx modality)
Returns the modality assumed by the given variable in this observation.
INLINE void setModality(const DiscreteVariable *var, Idx modality)
Sets the modality assumed by the given variable in this observation.
void initialize()
Initializes the Sdyna instance.
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Idx _lastAction_
The last performed action.
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Instantiation lastState_
The state in which the system is before we perform a new action.
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Set< Observation * > _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Idx _nbObservation_
The total number of observation made so far.
IDecisionStrategy * _decider_
The decider.
std::string toString()
Returns.
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
void makePlanning(Idx nbStep)
Starts a new planning.
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Size Idx
Type for indexes.
Idx randomValue(const Size max=2)
Returns a random Idx between 0 and max-1 included.
gum is the global namespace for all aGrUM entities
Headers of the SDyna abstract class.