d8/d13/structuredPlaner__tpl_8h_source.html

/****************************************************************************

 *   This file is part of the aGrUM/pyAgrum library.                        *

 *                                                                          *

 *   Copyright (c) 2005-2025 by                                             *

 *       - Pierre-Henri WUILLEMIN(_at_LIP6)                                 *

 *       - Christophe GONZALES(_at_AMU)                                     *

 *                                                                          *

 *   The aGrUM/pyAgrum library is free software; you can redistribute it    *

 *   and/or modify it under the terms of either :                           *

 *                                                                          *

 *    - the GNU Lesser General Public License as published by               *

 *      the Free Software Foundation, either version 3 of the License,      *

 *      or (at your option) any later version,                              *

 *    - the MIT license (MIT),                                              *

 *    - or both in dual license, as here.                                   *

 *                                                                          *

 *   (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html)    *

 *                                                                          *

 *   This aGrUM/pyAgrum library is distributed in the hope that it will be  *

 *   useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,          *

 *   INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *

 *   FOR A PARTICULAR PURPOSE  AND NONINFRINGEMENT. IN NO EVENT SHALL THE   *

 *   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *

 *   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,        *

 *   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR  *

 *   OTHER DEALINGS IN THE SOFTWARE.                                        *

 *                                                                          *

 *   See LICENCES for more details.                                         *

 *                                                                          *

 *   SPDX-FileCopyrightText: Copyright 2005-2025                            *

 *       - Pierre-Henri WUILLEMIN(_at_LIP6)                                 *

 *       - Christophe GONZALES(_at_AMU)                                     *

 *   SPDX-License-Identifier: LGPL-3.0-or-later OR MIT                      *

 *                                                                          *

 *   Contact  : info_at_agrum_dot_org                                       *

 *   homepage : http://agrum.gitlab.io                                      *

 *   gitlab   : https://gitlab.com/agrumery/agrum                           *

 *                                                                          *

 ****************************************************************************/

#pragma once


// =========================================================================

#include <queue>

#include <vector>

// #include <algorithm>

// #include <utility>

//  =========================================================================

#include <agrum/base/core/functors.h>


#include <agrum/base/core/math/math_utils.h>

// =========================================================================

#include <agrum/base/multidim/implementations/multiDimFunctionGraph.h>

#include <agrum/base/multidim/instantiation.h>

#include <agrum/base/multidim/tensor.h>

// =========================================================================

#include <agrum/FMDP/planning/structuredPlaner.h>

// =========================================================================


#define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)


namespace gum {


  /* **************************************************************************************************

   * **/

  /* ** **/

  /* **                                Constructors / Destructors **/

  /* ** **/

  /* **************************************************************************************************

   * **/


  // ===========================================================================

  // Default constructor

  // ===========================================================================

  template < typename GUM_SCALAR >


  INLINE StructuredPlaner< GUM_SCALAR >::StructuredPlaner(IOperatorStrategy< GUM_SCALAR >* opi,

                                                          GUM_SCALAR discountFactor,

                                                          GUM_SCALAR epsilon,

                                                          bool       verbose) :

      discountFactor_(discountFactor), operator_(opi), verbose_(verbose) {

    GUM_CONSTRUCTOR(StructuredPlaner);


    _threshold_    = epsilon;

    vFunction_     = nullptr;

    optimalPolicy_ = nullptr;

  }


  // ===========================================================================

  // Default destructor

  // ===========================================================================

  template < typename GUM_SCALAR >


  INLINE StructuredPlaner< GUM_SCALAR >::~StructuredPlaner() {

    GUM_DESTRUCTOR(StructuredPlaner);


    if (vFunction_) { delete vFunction_; }


    if (optimalPolicy_) delete optimalPolicy_;


    delete operator_;

  }


  /* **************************************************************************************************

   * **/

  /* ** **/

  /* **                                Datastructure access methods **/

  /* ** **/

  /* **************************************************************************************************

   * **/


  // ===========================================================================

  // Initializes data structure needed for making the planning

  // ===========================================================================

  template < typename GUM_SCALAR >


  std::string StructuredPlaner< GUM_SCALAR >::optimalPolicy2String() {

    // ************************************************************************

    // Discarding the case where no \pi* have been computed

    if (!optimalPolicy_ || optimalPolicy_->root() == 0) return "NO OPTIMAL POLICY CALCULATED YET";


    // ************************************************************************

    // Initialisation


    // Declaration of the needed string stream

    std::stringstream output;


    std::stringstream terminalStream;

    std::stringstream nonTerminalStream;

    std::stringstream arcstream;


    // First line for the toDot

    output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;


    // Form line for the internal node stream en the terminal node stream

    terminalStream << "node [shape = box];" << std::endl;

    nonTerminalStream << "node [shape = ellipse];" << std::endl;


    // For somme clarity in the final string

    std::string tab = "\t";


    // To know if we already checked a node or not

    Set< NodeId > visited;


    // FIFO of nodes to visit

    std::queue< NodeId > fifo;


    // Loading the FIFO

    fifo.push(optimalPolicy_->root());

    visited << optimalPolicy_->root();


    // ************************************************************************

    // Main loop

    while (!fifo.empty()) {

      // Node to visit

      NodeId currentNodeId = fifo.front();

      fifo.pop();


      // Checking if it is terminal

      if (optimalPolicy_->isTerminalNode(currentNodeId)) {

        // Get back the associated ActionSet

        ActionSet ase = optimalPolicy_->nodeValue(currentNodeId);


        // Creating a line for this node

        terminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""

                       << currentNodeId << " - ";


        // Enumerating and adding to the line the associated optimal actions

        for (SequenceIteratorSafe< Idx > valIter = ase.beginSafe(); valIter != ase.endSafe();

             ++valIter)

          terminalStream << fmdp_->actionName(*valIter) << " ";


        // Terminating line

        terminalStream << "\"];" << std::endl;

        continue;

      }


      // Either wise

      {

        // Geting back the associated internal node

        const InternalNode* currentNode = optimalPolicy_->node(currentNodeId);


        // Creating a line in internalnode stream for this node

        nonTerminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""

                          << currentNodeId << " - " << currentNode->nodeVar()->name() << "\"];"

                          << std::endl;


        // Going through the sons and agregating them according the the sons Ids

        HashTable< NodeId, LinkedList< Idx >* > sonMap;


        for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {

          if (!visited.exists(currentNode->son(sonIter))) {

            fifo.push(currentNode->son(sonIter));

            visited << currentNode->son(sonIter);

          }

          if (!sonMap.exists(currentNode->son(sonIter)))

            sonMap.insert(currentNode->son(sonIter), new LinkedList< Idx >());

          sonMap[currentNode->son(sonIter)]->addLink(sonIter);

        }


        // Adding to the arc stram

        for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe(); ++sonIter) {

          arcstream << tab << currentNodeId << " -> " << sonIter.key() << " [label=\" ";


          Link< Idx >* modaIter = sonIter.val()->list();

          while (modaIter) {

            arcstream << currentNode->nodeVar()->label(modaIter->element());

            if (modaIter->nextLink()) arcstream << ", ";

            modaIter = modaIter->nextLink();

          }

          arcstream << "\",color=\"#00ff00\"];" << std::endl;

          delete sonIter.val();

        }

      }

    }


    // Terminating

    output << terminalStream.str() << std::endl


           << nonTerminalStream.str() << std::endl

           << arcstream.str() << std::endl

           << "}" << std::endl;


    return output.str();


  }


  /* **************************************************************************************************

   * **/

  /* ** **/

  /* **                                     Planning Methods **/


  /* ** **/

  /* **************************************************************************************************

   * **/


  // ===========================================================================

  // Initializes data structure needed for making the planning

  // ===========================================================================


  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::initialize(const FMDP< GUM_SCALAR >* fmdp) {

    fmdp_ = fmdp;


    // Determination of the threshold value

    _threshold_ *= (1 - discountFactor_) / (2 * discountFactor_);


    // Establishement of sequence of variable elemination

    for (auto varIter = fmdp_->beginVariables(); varIter != fmdp_->endVariables(); ++varIter)

      elVarSeq_ << fmdp_->main2prime(*varIter);


    // Initialisation of the value function

    vFunction_     = operator_->getFunctionInstance();

    optimalPolicy_ = operator_->getAggregatorInstance();


    _firstTime_    = true;

  }


  // ===========================================================================

  // Performs a value iteration

  // ===========================================================================

  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::makePlanning(Idx nbStep) {

    if (_firstTime_) {

      this->initVFunction_();

      _firstTime_ = false;

    }


    // *****************************************************************************************


    // Main loop

    // *****************************************************************************************

    Idx        nbIte = 0;

    GUM_SCALAR gap   = _threshold_ + 1;

    while ((gap > _threshold_) && (nbIte < nbStep)) {

      ++nbIte;


      MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = this->valueIteration_();


      // *****************************************************************************************

      // Then we compare new value function and the old one

      MultiDimFunctionGraph< GUM_SCALAR >* deltaV = operator_->subtract(newVFunction, vFunction_);

      gap                                         = 0;


      for (deltaV->beginValues(); deltaV->hasValue(); deltaV->nextValue())


        if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());

      delete deltaV;


      if (verbose_)

        std::cout << " ------------------- Fin itération n° " << nbIte << std::endl

                  << " Gap : " << gap << " - " << _threshold_ << std::endl;


      // *****************************************************************************************

      // And eventually we update pointers for next loop

      delete vFunction_;

      vFunction_ = newVFunction;

    }


    // *****************************************************************************************

    // Policy matching value function research

    // *****************************************************************************************

    this->evalPolicy_();

  }


  // ===========================================================================

  // Performs a single step of value iteration

  // ===========================================================================

  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::initVFunction_() {

    vFunction_->copy(*(RECAST(fmdp_->reward())));

  }


  /* **************************************************************************************************


   * **/

  /* ** **/

  /* **                                   Value Iteration Methods **/

  /* ** **/

  /* **************************************************************************************************

   * **/


  // ===========================================================================

  // Performs a single step of value iteration

  // ===========================================================================

  template < typename GUM_SCALAR >


  MultiDimFunctionGraph< GUM_SCALAR >* StructuredPlaner< GUM_SCALAR >::valueIteration_() {

    // *****************************************************************************************

    // Loop reset

    MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();


    newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());


    // *****************************************************************************************

    // For each action

    std::vector< MultiDimFunctionGraph< GUM_SCALAR >* > qActionsSet;

    for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {

      MultiDimFunctionGraph< GUM_SCALAR >* qAction = this->evalQaction_(newVFunction, *actionIter);

      qActionsSet.push_back(qAction);

    }

    delete newVFunction;


    // *****************************************************************************************

    // Next to evaluate main value function, we take maximise over all action

    // value, ...

    newVFunction = this->maximiseQactions_(qActionsSet);


    // *******************************************************************************************

    // Next, we eval the new function value

    newVFunction = this->addReward_(newVFunction);


    return newVFunction;

  }


  // ===========================================================================

  // Evals the q function for current fmdp action

  // ===========================================================================

  template < typename GUM_SCALAR >

  MultiDimFunctionGraph< GUM_SCALAR >*


      StructuredPlaner< GUM_SCALAR >::evalQaction_(const MultiDimFunctionGraph< GUM_SCALAR >* Vold,

                                                   Idx actionId) {

    // ******************************************************************************

    // Initialisation :

    // Creating a copy of last Vfunction to deduce from the new Qaction

    // And finding the first var to eleminate (the one at the end)


    return operator_->regress(Vold, actionId, this->fmdp_, this->elVarSeq_);

  }


  // ===========================================================================

  // Maximise the AAction to iobtain the vFunction

  // ===========================================================================

  template < typename GUM_SCALAR >


  MultiDimFunctionGraph< GUM_SCALAR >* StructuredPlaner< GUM_SCALAR >::maximiseQactions_(

      std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {

    MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();

    qActionsSet.pop_back();


    while (!qActionsSet.empty()) {

      MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();

      qActionsSet.pop_back();

      newVFunction = operator_->maximize(newVFunction, qAction);

    }


    return newVFunction;

  }


  // ===========================================================================

  // Maximise the AAction to iobtain the vFunction

  // ===========================================================================

  template < typename GUM_SCALAR >


  MultiDimFunctionGraph< GUM_SCALAR >* StructuredPlaner< GUM_SCALAR >::minimiseFunctions_(

      std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {

    MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();

    qActionsSet.pop_back();


    while (!qActionsSet.empty()) {

      MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();

      qActionsSet.pop_back();

      newVFunction = operator_->minimize(newVFunction, qAction);

    }


    return newVFunction;

  }


  // ===========================================================================

  // Updates the value function by multiplying by discount and adding reward

  // ===========================================================================

  template < typename GUM_SCALAR >

  MultiDimFunctionGraph< GUM_SCALAR >*


      StructuredPlaner< GUM_SCALAR >::addReward_(MultiDimFunctionGraph< GUM_SCALAR >* Vold,

                                                 Idx                                  actionId) {

    // *****************************************************************************************

    // ... we multiply the result by the discount factor, ...

    MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();

    newVFunction->copyAndMultiplyByScalar(*Vold, this->discountFactor_);

    delete Vold;


    // *****************************************************************************************

    // ... and finally add reward

    newVFunction = operator_->add(newVFunction, RECAST(fmdp_->reward(actionId)));


    return newVFunction;

  }


  /* **************************************************************************************************

   * **/

  /* ** **/

  /* **                                   Optimal Policy Evaluation Methods **/

  /* ** **/

  /* **************************************************************************************************

   * **/


  // ===========================================================================

  // Evals the policy corresponding to the given value function

  // ===========================================================================

  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::evalPolicy_() {

    // *****************************************************************************************

    // Loop reset

    MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = operator_->getFunctionInstance();

    newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());


    std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* >

        argMaxQActionsSet;

    // *****************************************************************************************

    // For each action

    for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {

      MultiDimFunctionGraph< GUM_SCALAR >* qAction = this->evalQaction_(newVFunction, *actionIter);


      qAction = this->addReward_(qAction);


      argMaxQActionsSet.push_back(makeArgMax_(qAction, *actionIter));

    }

    delete newVFunction;


    // *****************************************************************************************

    // Next to evaluate main value function, we take maximise over all action

    // value, ...

    MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* argMaxVFunction

        = argmaximiseQactions_(argMaxQActionsSet);


    // *****************************************************************************************

    // Next to evaluate main value function, we take maximise over all action

    // value, ...

    extractOptimalPolicy_(argMaxVFunction);

  }


  // ===========================================================================

  // Creates a copy of given in parameter decision Graph and replaces leaves

  // of that Graph by a pair containing value of the leaf and action to which

  // is bind this Graph (given in parameter).

  // ===========================================================================

  template < typename GUM_SCALAR >

  MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >*


      StructuredPlaner< GUM_SCALAR >::makeArgMax_(

          const MultiDimFunctionGraph< GUM_SCALAR >* qAction,

          Idx                                        actionId) {

    MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* amcpy

        = operator_->getArgMaxFunctionInstance();


    // Insertion des nouvelles variables

    for (SequenceIteratorSafe< const DiscreteVariable* > varIter

         = qAction->variablesSequence().beginSafe();

         varIter != qAction->variablesSequence().endSafe();

         ++varIter)

      amcpy->add(**varIter);


    HashTable< NodeId, NodeId > src2dest;

    amcpy->manager()->setRootNode(

        _recurArgMaxCopy_(qAction->root(), actionId, qAction, amcpy, src2dest));


    delete qAction;

    return amcpy;

  }


  // ==========================================================================

  // Recursion part for the createArgMaxCopy

  // ==========================================================================

  template < typename GUM_SCALAR >


  NodeId StructuredPlaner< GUM_SCALAR >::_recurArgMaxCopy_(

      NodeId                                                                        currentNodeId,

      Idx                                                                           actionId,

      const MultiDimFunctionGraph< GUM_SCALAR >*                                    src,

      MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* argMaxCpy,

      HashTable< NodeId, NodeId >&                                                  visitedNodes) {

    if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];


    NodeId nody;

    if (src->isTerminalNode(currentNodeId)) {

      ArgMaxSet< GUM_SCALAR, Idx > leaf(src->nodeValue(currentNodeId), actionId);

      nody = argMaxCpy->manager()->addTerminalNode(leaf);

    } else {

      const InternalNode* currentNode = src->node(currentNodeId);

      NodeId*             sonsMap     = static_cast< NodeId* >(

          SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));

      for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)

        sonsMap[moda]

            = _recurArgMaxCopy_(currentNode->son(moda), actionId, src, argMaxCpy, visitedNodes);

      nody = argMaxCpy->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);

    }

    visitedNodes.insert(currentNodeId, nody);

    return nody;

  }


  // ===========================================================================

  // Performs argmax_a Q(s,a)

  // ===========================================================================

  template < typename GUM_SCALAR >

  MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >*


      StructuredPlaner< GUM_SCALAR >::argmaximiseQactions_(

          std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >,

                                              SetTerminalNodePolicy >* >& qActionsSet) {

    MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* newVFunction

        = qActionsSet.back();

    qActionsSet.pop_back();


    while (!qActionsSet.empty()) {

      MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >* qAction

          = qActionsSet.back();

      qActionsSet.pop_back();

      newVFunction = operator_->argmaximize(newVFunction, qAction);

    }


    return newVFunction;

  }


  // ===========================================================================

  // Creates a copy of given in parameter decision Graph and replaces leaves

  // of that Graph by a pair containing value of the leaf and action to which

  // is bind this Graph (given in parameter).

  // ===========================================================================

  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::extractOptimalPolicy_(

      const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >*

          argMaxOptimalValueFunction) {

    optimalPolicy_->clear();


    // Insertion des nouvelles variables

    for (SequenceIteratorSafe< const DiscreteVariable* > varIter

         = argMaxOptimalValueFunction->variablesSequence().beginSafe();

         varIter != argMaxOptimalValueFunction->variablesSequence().endSafe();

         ++varIter)

      optimalPolicy_->add(**varIter);


    HashTable< NodeId, NodeId > src2dest;

    optimalPolicy_->manager()->setRootNode(_recurExtractOptPol_(argMaxOptimalValueFunction->root(),

                                                                argMaxOptimalValueFunction,

                                                                src2dest));


    delete argMaxOptimalValueFunction;

  }


  // ==========================================================================

  // Recursion part for the createArgMaxCopy

  // ==========================================================================

  template < typename GUM_SCALAR >


  NodeId StructuredPlaner< GUM_SCALAR >::_recurExtractOptPol_(

      NodeId currentNodeId,

      const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy >*

                                   argMaxOptVFunc,

      HashTable< NodeId, NodeId >& visitedNodes) {

    if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];


    NodeId nody;

    if (argMaxOptVFunc->isTerminalNode(currentNodeId)) {

      ActionSet leaf;

      _transferActionIds_(argMaxOptVFunc->nodeValue(currentNodeId), leaf);

      nody = optimalPolicy_->manager()->addTerminalNode(leaf);

    } else {

      const InternalNode* currentNode = argMaxOptVFunc->node(currentNodeId);

      NodeId*             sonsMap     = static_cast< NodeId* >(

          SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));

      for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)

        sonsMap[moda] = _recurExtractOptPol_(currentNode->son(moda), argMaxOptVFunc, visitedNodes);

      nody = optimalPolicy_->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);

    }

    visitedNodes.insert(currentNodeId, nody);

    return nody;

  }


  // ==========================================================================

  // Extract from an ArgMaxSet the associated ActionSet

  // ==========================================================================

  template < typename GUM_SCALAR >


  void StructuredPlaner< GUM_SCALAR >::_transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx >& src,

                                                           ActionSet& dest) {

    for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)

      dest += *idi;

  }


}   // end of namespace gum


gum::ActionSet
A class to store the optimal actions.
Definition actionSet.h:105

gum::ActionSet::endSafe
SequenceIteratorSafe< Idx > endSafe() const
Iterator end.
Definition actionSet.h:166

gum::ActionSet::beginSafe
SequenceIteratorSafe< Idx > beginSafe() const
Iterator beginning.
Definition actionSet.h:161

gum::ArgMaxSet
Class to handle efficiently argMaxSet.
Definition argMaxSet.h:78

gum::ArgMaxSet::beginSafe
SequenceIteratorSafe< GUM_SCALAR_SEQ > beginSafe() const
Iterator beginning.
Definition argMaxSet.h:129

gum::ArgMaxSet::endSafe
SequenceIteratorSafe< GUM_SCALAR_SEQ > endSafe() const
Iterator end.
Definition argMaxSet.h:134

gum::DiscreteVariable::label
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.

gum::DiscreteVariable::domainSize
virtual Size domainSize() const =0

gum::ExactTerminalNodePolicy::nextValue
void nextValue() const
Increments the constant safe iterator.
Definition ExactTerminalNodePolicy.h:123

gum::ExactTerminalNodePolicy::beginValues
void beginValues() const
Initializes the constant safe iterator on terminal nodes.
Definition ExactTerminalNodePolicy.h:117

gum::ExactTerminalNodePolicy::hasValue
bool hasValue() const
Indicates if constant safe iterator has reach end of terminal nodes list.
Definition ExactTerminalNodePolicy.h:120

gum::ExactTerminalNodePolicy::value
const GUM_SCALAR & value() const
Returns the value of the current terminal nodes pointed by the constant safe iterator.
Definition ExactTerminalNodePolicy.h:127

gum::FMDP
This class is used to implement factored decision process.
Definition fmdp.h:73

gum::HashTable
The class for generic Hash Tables.
Definition hashTable.h:637

gum::HashTable::endSafe
const iterator_safe & endSafe() noexcept
Returns the safe iterator pointing to the end of the hashtable.
Definition hashTable_tpl.h:486

gum::HashTable::exists
bool exists(const Key &key) const
Checks whether there exists an element with a given key in the hashtable.
Definition hashTable_tpl.h:546

gum::HashTable::insert
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
Definition hashTable_tpl.h:665

gum::HashTable::beginSafe
iterator_safe beginSafe()
Returns the safe iterator pointing to the beginning of the hashtable.
Definition hashTable_tpl.h:503

gum::IOperatorStrategy
<agrum/FMDP/SDyna/IOperatorStrategy.h>
Definition IOperatorStrategy.h:71

gum::InternalNode
Structure used to represent a node internal structure.
Definition internalNode.h:124

gum::InternalNode::nodeVar
const DiscreteVariable * nodeVar() const
Returns the node variable.
Definition internalNode_inl.h:97

gum::InternalNode::nbSons
Idx nbSons() const
Returns the number of sons.
Definition internalNode_inl.h:106

gum::InternalNode::son
NodeId son(Idx modality) const
Returns the son at a given index.
Definition internalNode_inl.h:103

gum::Link
Link of a chain list allocated using the SmallObjectAllocator.
Definition link.h:72

gum::Link::nextLink
const Link< T > * nextLink() const
Returns next link.
Definition link_tpl.h:97

gum::Link::element
const T & element() const
Returns the element stored in this link.
Definition link_tpl.h:87

gum::LinkedList
Chain list allocated using the SmallObjectAllocator.
Definition link.h:155

gum::MultiDimFunctionGraph
Class implementingting a function graph.
Definition multiDimFunctionGraph.h:92

gum::MultiDimFunctionGraph::manager
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
Definition multiDimFunctionGraph_tpl.h:528

gum::MultiDimFunctionGraph::root
const NodeId & root() const
Returns the id of the root node from the diagram.
Definition multiDimFunctionGraph_tpl.h:539

gum::MultiDimFunctionGraph::isTerminalNode
bool isTerminalNode(const NodeId &node) const
Indicates if given node is terminal or not.
Definition multiDimFunctionGraph_tpl.h:545

gum::MultiDimFunctionGraph::node
const InternalNode * node(NodeId n) const
Returns internalNode structure associated to that nodeId.
Definition multiDimFunctionGraph_tpl.h:569

gum::MultiDimFunctionGraph::nodeValue
const GUM_SCALAR & nodeValue(NodeId n) const
Returns value associated to given node.
Definition multiDimFunctionGraph_tpl.h:560

gum::MultiDimFunctionGraph::copyAndMultiplyByScalar
void copyAndMultiplyByScalar(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, GUM_SCALAR gamma)
Copies src diagrams and multiply every value by the given scalar.
Definition multiDimFunctionGraph_tpl.h:358

gum::MultiDimFunctionGraph::copyAndReassign
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
Definition multiDimFunctionGraph_tpl.h:292

gum::MultiDimFunctionGraph::add
virtual void add(const DiscreteVariable &v)
Adds a new var to the variables of the multidimensional matrix.
Definition multiDimFunctionGraph_tpl.h:156

gum::MultiDimImplementation::variablesSequence
virtual const Sequence< const DiscreteVariable * > & variablesSequence() const override
Returns a const ref to the sequence of DiscreteVariable*.
Definition multiDimImplementation_tpl.h:216

gum::SequenceIteratorSafe< Idx >

gum::SetTerminalNodePolicy
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
Definition SetTerminalNodePolicy.h:69

gum::Set< NodeId >

gum::Set::exists
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition set_tpl.h:533

gum::StructuredPlaner::evalPolicy_
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
Definition structuredPlaner_tpl.h:435

gum::StructuredPlaner::_recurExtractOptPol_
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
Definition structuredPlaner_tpl.h:576

gum::StructuredPlaner::discountFactor_
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
Definition structuredPlaner.h:365

gum::StructuredPlaner::initialize
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Definition structuredPlaner_tpl.h:243

gum::StructuredPlaner::_recurArgMaxCopy_
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
Definition structuredPlaner_tpl.h:499

gum::StructuredPlaner< double >::_firstTime_
bool _firstTime_
Definition structuredPlaner.h:382

gum::StructuredPlaner::verbose_
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal.
Definition structuredPlaner.h:373

gum::StructuredPlaner::~StructuredPlaner
virtual ~StructuredPlaner()
Default destructor.
Definition structuredPlaner_tpl.h:102

gum::StructuredPlaner::makePlanning
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
Definition structuredPlaner_tpl.h:263

gum::StructuredPlaner::_transferActionIds_
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
Definition structuredPlaner_tpl.h:604

gum::StructuredPlaner::minimiseFunctions_
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
Definition structuredPlaner_tpl.h:389

gum::StructuredPlaner::argmaximiseQactions_
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a).
Definition structuredPlaner_tpl.h:529

gum::StructuredPlaner::extractOptimalPolicy_
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
Definition structuredPlaner_tpl.h:552

gum::StructuredPlaner::optimalPolicy_
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
Definition structuredPlaner.h:355

gum::StructuredPlaner::addReward_
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
Definition structuredPlaner_tpl.h:408

gum::StructuredPlaner::operator_
IOperatorStrategy< GUM_SCALAR > * operator_
Definition structuredPlaner.h:367

gum::StructuredPlaner< double >::fmdp_
const FMDP< double > * fmdp_
Definition structuredPlaner.h:340

gum::StructuredPlaner::optimalPolicy2String
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
Definition structuredPlaner_tpl.h:124

gum::StructuredPlaner< double >::valueIteration_
virtual MultiDimFunctionGraph< double > * valueIteration_()
Definition structuredPlaner_tpl.h:325

gum::StructuredPlaner::_threshold_
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*.
Definition structuredPlaner.h:381

gum::StructuredPlaner< double >::elVarSeq_
gum::VariableSet elVarSeq_
Definition structuredPlaner.h:360

gum::StructuredPlaner::StructuredPlaner
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
Definition structuredPlaner_tpl.h:86

gum::StructuredPlaner::initVFunction_
virtual void initVFunction_()
Performs a single step of value iteration.
Definition structuredPlaner_tpl.h:308

gum::StructuredPlaner::fmdp
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
Definition structuredPlaner.h:148

gum::StructuredPlaner::makeArgMax_
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * makeArgMax_(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
Definition structuredPlaner_tpl.h:474

gum::StructuredPlaner::maximiseQactions_
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a).
Definition structuredPlaner_tpl.h:371

gum::StructuredPlaner::vFunction_
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
Definition structuredPlaner.h:345

gum::StructuredPlaner< double >::evalQaction_
virtual MultiDimFunctionGraph< double > * evalQaction_(const MultiDimFunctionGraph< double > *, Idx)
Definition structuredPlaner_tpl.h:357

gum::Variable::name
const std::string & name() const
returns the name of the variable

RECAST
#define RECAST(x)
Definition fmdp_tpl.h:57

functors.h
This files contains several function objects that are not (yet) defined in the STL.

gum::Idx
Size Idx
Type for indexes.
Definition types.h:79

gum::NodeId
Size NodeId
Type for node ids.
Definition graphElements.h:117

instantiation.h
Header files of gum::Instantiation.

math_utils.h
Useful macros for maths.

multiDimFunctionGraph.h
Headers of MultiDimFunctionGraph.

gum
gum is the global namespace for all aGrUM entities
Definition agrum.h:46

SOA_ALLOCATE
#define SOA_ALLOCATE(x)
Definition smallObjectAllocator.h:182

structuredPlaner.h
Headers of the StructuredPlaner planer class.

tensor.h
Header of the Tensor class.