74 if (
_database_.nbRows() == std::size_t(0))
return;
82 const auto cols =
_database_.columnsFromVariableName(var_name);
86 "Variable " << var_name <<
" could not be found in the database")
96 "Variable of Id " <<
id <<
" could not be found in the database")
260 if ((noise < 0.0) || (noise > 1.0))
283 if ((noise < 0.0) || (noise > 1.0))
300 else GUM_ERROR(
NotFound,
"EM is currently forbidden. Please enable it with useEM()")
312 else return "EM is currently forbidden. Please enable it with useEM()";
463 for (
const auto& slice: slices) {
464 for (
const auto& name: slice) {
528 "IBNLearner getPriorType does "
529 "not support yet this prior")
552 INLINE
const std::vector< std::pair< std::size_t, std::size_t > >&
A class for generic framework of learning algorithms that can easily be used.
The base class for all directed edges.
const EdgeSet & edges() const
returns the set of edges stored within the EdgeGraphPart
The base class for all undirected edges.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
ApproximationSchemeSTATE
The different state of an approximation scheme.
Error: A name of variable is not found in the database.
Exception : the element we looked for cannot be found.
Exception : operation not allowed.
Exception : out of bound.
virtual void setNumberOfThreads(Size nb)
sets the number max of threads to be used by the class containing this ThreadNumberManager
Base class for undirected graphs.
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The class representing a tabular database as used by learning tasks.
A class for parameterizing EM's parameter learning approximations.
const std::vector< std::string > & missingSymbols() const
returns the set of missing symbols taken into account
const DatabaseTable & databaseTable() const
returns the internal database table
std::size_t size() const
returns the number of records in the database
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
std::vector< std::size_t > _domain_sizes_
the domain sizes of the variables (useful to speed-up computations)
DatabaseTable _database_
the database itself
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
double weight(const std::size_t i) const
returns the weight of the ith record
Bijection< NodeId, std::size_t > _nodeId2cols_
a bijection assigning to each variable name its NodeId
const std::vector< std::string > & names() const
returns the names of the variables in the database
void setWeight(const std::size_t i, const double weight)
sets the weight of the ith record
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
DBRowGeneratorParser & parser()
returns the parser for the database
DBRowGeneratorParser * _parser_
the parser used for reading the database
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the database so that the sum of their weights is equal to new_weig...
std::size_t nbRows() const
returns the number of records in the database
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables
double weight() const
returns the weight of the whole database
StructuralConstraintPossibleEdges constraintPossibleEdges_
the constraint on possible Edges
StructuralConstraintNoParentNodes constraintNoParentNodes_
the constraint on no parent nodes
void eraseNoChildrenNode(NodeId node)
double recordWeight(const std::size_t i) const
returns the weight of the ith record
BNLearnerPriorType priorType_
the a priorselected for the score and parameters
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
void useScoreBDeu()
indicate that we wish to use a BDeu score
void addNoParentNode(NodeId node)
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
bool isUsingEM() const
indicates whether we use EM for parameter learning
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
std::string priorDbname_
the filename for the Dirichlet a priori, if any
double priorWeight_
the weight of the prior
double noiseEM_
the noise factor (in (0,1)) used by EM for perturbing the CPT during init
std::vector< std::pair< std::size_t, std::size_t > > ranges_
the set of rows' ranges within the database in which learning is done
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
std::string checkScorePriorCompatibility() const
checks whether the current score and prior are compatible
void useBDeuPrior(double weight=1.0)
use the BDeu prior
void setMandatoryArcs(const ArcSet &set)
assign a set of mandatory arcs
@ LOCAL_SEARCH_WITH_TABU_LIST
ApproximationSchemeSTATE EMState() const
returns the state of the last EM algorithm executed
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
double databaseWeight() const
returns the weight of the whole database
K2 algoK2_
the K2 algorithm
void addMandatoryArc(const Arc &arc)
AlgoType selectedAlgo_
the selected learning algorithm
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
void setMaxIndegree(Size max_indegree)
sets the max indegree
void addPossibleEdge(const Edge &edge)
void setInitialDAG(const DAG &)
sets an initial DAG structure
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
Database scoreDatabase_
the database to be used by the scores and parameter estimators
bool useEM_
a Boolean indicating whether we should use EM for parameter learning or not
DAG2BNLearner dag2BN_
the parametric EM
BNLearnerPriorType
an enumeration to select the prior
@ DIRICHLET_FROM_BAYESNET
@ DIRICHLET_FROM_DATABASE
void erasePossibleEdge(const Edge &edge)
void setNumberOfThreads(Size nb) override
sets the number max of threads that can be used
void useScoreBIC()
indicate that we wish to use a BIC score
Size nbDecreasingChanges_
StructuralConstraintNoChildrenNodes constraintNoChildrenNodes_
the constraint on no children nodes
DAG initialDAG()
returns the initial DAG structure
void setPossibleEdges(const EdgeSet &set)
assign a set of possible edges
void useNoPrior()
use no prior
ScoreType scoreType_
the score selected for learning
void eraseForbiddenArc(const Arc &arc)
void useSmoothingPrior(double weight=1)
use the prior smoothing
PriorType getPriorType_() const
returns the type (as a string) of a given prior
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
void useScoreK2()
indicate that we wish to use a K2 score
StructuralConstraintIndegree constraintIndegree_
the constraint for indegrees
void _setPriorWeight_(double weight)
sets the prior weight
void setPossibleSkeleton(const UndiGraph &skeleton)
assign a set of possible edges
void useEMWithRateCriterion(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters with the rate stopping criterion
void useNMLCorrection()
indicate that we wish to use the NML correction for and MIIC
void useEM(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters
void useEMWithDiffCriterion(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters with the diff stopping criterion
bool hasMissingValues() const
returns true if the learner's database has missing values
void forbidEM()
prevent using the EM algorithm for parameter learning
double epsilon() const override
Get the value of epsilon.
Score * score_
the score used
StructuralConstraintMandatoryArcs constraintMandatoryArcs_
the constraint on mandatory arcs
Miic algoMiic_
the Constraint MIIC algorithm
EMApproximationScheme & EM()
returns the EM parameter learning approximation scheme if EM is enabled
void useNoCorrection()
indicate that we wish to use the NoCorr correction for MIIC
StructuralConstraintForbiddenArcs constraintForbiddenArcs_
the constraint on forbidden arcs
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
void setRecordWeight(const std::size_t i, const double weight)
sets the weight of the ith record of the database
void useMDLCorrection()
indicate that we wish to use the MDL correction for MIIC
void useDirichletPrior(const std::string &filename, double weight=1)
use the Dirichlet prior from a database
StructuralConstraintTabuList constraintTabuList_
the constraint for tabu lists
void addForbiddenArc(const Arc &arc)
DAG initialDag_
an initial DAG given to learners
void addNoChildrenNode(NodeId node)
Size domainSize(NodeId var) const
learn a structure from a file (must have read the db before)
void useScoreAIC()
indicate that we wish to use an AIC score
const std::vector< std::string > & names() const
returns the names of the variables in the database
void eraseMandatoryArc(const Arc &arc)
void useMIIC()
indicate that we wish to use MIIC
LocalSearchWithTabuList localSearchWithTabuList_
the local search with tabu list algorithm
StructuralConstraintSliceOrder constraintSliceOrder_
the constraint for 2TBNs
const DatabaseTable & database() const
returns the database used by the BNLearner
void eraseNoParentNode(NodeId node)
CorrectedMutualInformation::KModeTypes kmodeMiic_
the penalty used in MIIC
void useScoreBD()
indicate that we wish to use a BD score
std::string EMStateMessage() const
returns the state of the EM algorithm
the structural constraint imposing a partial order over nodes
#define GUM_ERROR(type, msg)
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Set< Edge > EdgeSet
Some typdefs and define for shortcuts ...
Size NodeId
Type for node ids.
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
HashTable< NodeId, VAL > NodeProperty
Property on graph elements.
include the inlined functions if necessary
Base classes for undirected graphs.