72 const auto& var_names =
_database_.variableNames();
73 const std::size_t nb_vars = var_names.size();
76 for (std::size_t i = 0; i < nb_vars; ++i) {
85 const std::vector< std::string >& missing_symbols,
86 const bool induceTypes) :
90 for (
const auto& [first, second]:
_database_.betterTranslators()) {
101 const std::vector< std::string >& missing_symbols) {
106 std::size_t prior_nb_vars = prior_names.size();
108 for (
auto i = std::size_t(0); i < prior_nb_vars; ++i)
109 prior_names2col.
insert(prior_names[i], i);
115 "the a prior database has fewer variables "
116 "than the observed database")
122 const std::size_t score_nb_vars = score_names.size();
124 for (
auto i = std::size_t(0); i < score_nb_vars; ++i) {
126 mapping.
insert(i, prior_names2col[score_names[i]]);
129 "Variable " << score_names[i]
130 <<
" of the observed database does not belong to the "
136 for (
auto i = std::size_t(0); i < score_nb_vars; ++i) {
138 _database_.insertTranslator(var, mapping[i], missing_symbols);
202 const std::vector< std::string >& missing_symbols,
203 const bool induceTypes) :
374 dag2BN_ = std::move(from.dag2BN_);
378 ranges_ = std::move(from.ranges_);
391 if (
auto filename_size =
Size(filename.size()); filename_size < 4) {
393 "IBNLearner could not determine the "
394 "file type of the database '"
398 std::string extension = filename.substr(filename.size() - 4);
399 std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
401 if (extension !=
".csv") {
403 "IBNLearner does not support yet this type ('" << extension
411 const std::size_t nb_vars = var_names.size();
415 for (std::size_t i = 0; i < nb_vars; ++i) {
429 if (
auto filename_size =
Size(filename.size()); filename_size < 4) {
431 "IBNLearner could not determine the "
432 "file type of the database")
435 auto extension = filename.substr(filename.size() - 4);
436 std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
438 if (extension !=
".csv") {
444 const std::vector< std::string >& missing_symbols) {
451 const std::size_t nb_vars = var_names.size();
455 for (std::size_t i = 0; i < nb_vars; ++i) {
520 if (old_score !=
nullptr)
delete old_score;
528 bool take_into_account_score) {
534 if (take_into_account_score && (
score_ !=
nullptr)) {
552 "IBNLearner does not support " <<
"yet this parameter estimator")
562 return param_estimator;
609 if (possible_edges.
empty()) {
611 for (
NodeId j = 0; j < i; ++j) {
617 for (
const auto& edge: possible_edges) {
618 mgraph.
addEdge(edge.first(), edge.second());
628 for (
const auto& arc: mandatory_arcs) {
629 mandatoryGraph.
addArc(arc.tail(), arc.head());
630 forbiddenGraph.
addArc(arc.head(), arc.head());
636 for (
const auto& arc: forbidden_arcs) {
637 forbiddenGraph.
addArc(arc.tail(), arc.head());
643 for (
const auto& [n1, r1]: sliceOrder) {
644 for (
const auto& [n2, r2]: copyOrder) {
646 forbiddenGraph.
addArc(n1, n2);
648 }
else if (r2 > r1) {
649 forbiddenGraph.
addArc(n2, n1);
657 for (
const auto node2: mgraph.
nodes()) {
658 if (node != node2) { forbiddenGraph.
addArc(node2, node); }
663 for (
const auto node2: mgraph.
nodes()) {
664 if (node != node2) { forbiddenGraph.
addArc(node, node2); }
671 algoMiic_.setMandatoryGraph(mandatoryGraph);
672 algoMiic_.setForbiddenGraph(forbiddenGraph);
686 "Score-based algorithms do not build PDAG. Please use a constraint-based "
692 "For the moment, the BNLearner is unable to learn "
693 <<
"structures with missing values in databases")
724 "The BNLearner's corrected mutual information class does "
725 <<
"not implement yet this correction : " <<
int(
kmodeMiic_))
736 "For the moment, the BNLearner is unable to cope "
737 "with missing values in databases")
748 init_graph.
addArc(arc.tail(), arc.head());
860 const ArcSet& mandatory_arcs
863 bool order_compatible =
true;
865 for (
const auto& arc: mandatory_arcs) {
866 if (order.
pos(arc.tail()) >= order.
pos(arc.head())) {
867 order_compatible =
false;
872 if (order_compatible) {
879 return algoK2_.learnStructure(selector, init_graph);
887 return algoK2_.learnStructure(selector, init_graph);
894 "the learnDAG method has not been implemented for this "
895 "learning algorithm")
918 default :
return "IBNLearner does not support yet this score";
923 std::pair< std::size_t, std::size_t >
925 const std::size_t k_fold) {
928 if (learning_fold >= k_fold) {
930 "In " << k_fold <<
"-fold cross validation, the learning "
931 <<
"fold should be strictly lower than " << k_fold
932 <<
" but, here, it is equal to " << learning_fold)
935 const std::size_t db_size =
scoreDatabase_.databaseTable().nbRows();
936 if (k_fold >= db_size) {
938 "In " << k_fold <<
"-fold cross validation, the database's "
939 <<
"size should be strictly greater than " << k_fold
940 <<
" but, here, the database has only " << db_size <<
"rows")
944 const std::size_t foldSize = db_size / k_fold;
945 const std::size_t unfold_deb = learning_fold * foldSize;
946 const std::size_t unfold_end = unfold_deb + foldSize;
949 if (learning_fold == std::size_t(0)) {
950 ranges_.push_back(std::pair< std::size_t, std::size_t >(unfold_end, db_size));
952 ranges_.push_back(std::pair< std::size_t, std::size_t >(std::size_t(0), unfold_deb));
954 if (learning_fold != k_fold - 1) {
955 ranges_.push_back(std::pair< std::size_t, std::size_t >(unfold_end, db_size));
959 return std::pair< std::size_t, std::size_t >(unfold_deb, unfold_end);
962 std::pair< double, double >
967 return chi2score.
statistics(id1, id2, knowing);
971 const std::string& name2,
972 const std::vector< std::string >& knowing) {
973 std::vector< NodeId > knowingIds;
974 std::transform(knowing.begin(),
976 std::back_inserter(knowingIds),
977 [
this](
const std::string& c) { return this->idFromName(c); });
981 std::pair< double, double >
989 const std::string& name2,
990 const std::vector< std::string >& knowing) {
991 std::vector< NodeId > knowingIds;
992 std::transform(knowing.begin(),
994 std::back_inserter(knowingIds),
995 [
this](
const std::string& c) { return this->idFromName(c); });
1000 const std::vector< NodeId >& knowing) {
1004 std::vector< NodeId > total(vars);
1005 total.insert(total.end(), knowing.begin(), knowing.end());
1007 if (knowing.size() == (
Size)0) {
1011 return LLtotal - LLknw;
1016 const std::vector< std::string >& knowing) {
1017 std::vector< NodeId > ids;
1018 std::vector< NodeId > knowingIds;
1020 auto mapper = [
this](
const std::string& c) {
return this->
idFromName(c); };
1022 std::transform(vars.begin(), vars.end(), std::back_inserter(ids), mapper);
1023 std::transform(knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
1030 const std::vector< NodeId >& knowing) {
1038 case MDL : cmi.
useMDL();
break;
1040 case NML : cmi.
useNML();
break;
1046 "The BNLearner's corrected mutual information class does "
1047 <<
"not implement yet this correction : " <<
int(
kmodeMiic_))
1054 const std::string& var2,
1055 const std::vector< std::string >& knowing) {
1056 std::vector< NodeId > knowingIds;
1058 auto mapper = [
this](
const std::string& c) {
return this->
idFromName(c); };
1060 std::transform(knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
1067 const std::vector< NodeId >& knowing) {
1077 const std::string& var2,
1078 const std::vector< std::string >& knowing) {
1079 std::vector< NodeId > knowingIds;
1081 auto mapper = [
this](
const std::string& c) {
return this->
idFromName(c); };
1083 std::transform(knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
1092 return score_->score(var, knowing);
1096 auto mapper = [
this](
const std::string& c) {
return this->
idFromName(c); };
1099 std::vector< NodeId > knowingIds;
1100 knowingIds.reserve(knowing.size());
1101 std::transform(knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
1103 return score(
id, knowingIds);
1109 "BNLearner cannot compute pseudo-counts with missing values in the database")
1118 return count.
get(vars);
1122 std::vector< NodeId > ids;
1124 auto mapper = [
this](
const std::string& c) {
return this->
idFromName(c); };
1126 std::transform(vars.begin(), vars.end(), std::back_inserter(ids), mapper);
1132 const std::vector< std::pair< std::size_t, std::size_t > >& new_ranges) {
1135 score.setRanges(new_ranges);
A listener that allows BNLearner to be used as a proxy for its inner algorithms.
A class for generic framework of learning algorithms that can easily be used.
A pack of learning algorithms that can easily be used.
virtual void eraseArc(const Arc &arc)
removes an arc from the ArcGraphPart
void addArc(NodeId tail, NodeId head) final
insert a new arc into the directed graph
Base class for all oriented graphs.
virtual void addArc(const NodeId tail, const NodeId head)
insert a new arc into the directed graph
Base class for all aGrUM's exceptions.
The class for generic Hash Tables.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
IApproximationSchemeConfiguration()
Class constructors.
Exception: at least one argument passed to a function is not what was expected.
Error: The database contains some missing values.
Error: A name of variable is not found in the database.
Base class for mixed graphs.
const NodeGraphPart & nodes() const
return *this as a NodeGraphPart
bool exists(const NodeId id) const
alias for existsNode
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Exception : there is something wrong with an implementation.
Exception : operation not allowed.
Exception : out of bound.
Base class for partially directed acyclic graphs.
Idx pos(const Key &key) const
bool empty() const noexcept
Indicates whether the set is the empty set.
aGrUM's Tensor is a multi-dimensional array with tensor operators.
bool isGumNumberOfThreadsOverriden() const
indicates whether the class containing this ThreadNumberManager set its own number of threads
virtual Size getNumberOfThreads() const
returns the current max number of threads used by the class containing this ThreadNumberManager
ThreadNumberManager(Size nb_threads=0)
default constructor
ThreadNumberManager & operator=(const ThreadNumberManager &from)
copy operator
void addEdge(NodeId first, NodeId second) override
insert a new edge into the undirected graph
Base class for every random variable.
A class that redirects gum_signal from algorithms to the listeners of BNLearn.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The class used to pack sets of generators.
The databases' cell translators for labelized variables.
the class for packing together the translators used to preprocess the datasets
std::size_t insertTranslator(const DBTranslator &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
The class representing a tabular database as used by learning tasks.
void setVariableNames(const std::vector< std::string > &names, const bool from_external_object=true) override
sets the names of the variables
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
The basic class for computing the next graph changes possible in a structure learning algorithm.
The basic class for computing the next graph changes possible in a structure learning algorithm.
The mecanism to compute the next available graph changes for directed structure learning search algor...
a helper to easily read databases
Database(const std::string &file, const std::vector< std::string > &missing_symbols, const bool induceTypes=false)
default constructor
const DatabaseTable & databaseTable() const
returns the internal database table
std::vector< std::size_t > _domain_sizes_
the domain sizes of the variables (useful to speed-up computations)
DatabaseTable _database_
the database itself
Bijection< NodeId, std::size_t > _nodeId2cols_
a bijection assigning to each variable name its NodeId
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
Database & operator=(const Database &from)
copy operator
DBRowGeneratorParser * _parser_
the parser used for reading the database
StructuralConstraintPossibleEdges constraintPossibleEdges_
the constraint on possible Edges
StructuralConstraintNoParentNodes constraintNoParentNodes_
the constraint on no parent nodes
BNLearnerPriorType priorType_
the a priorselected for the score and parameters
IBNLearner(const std::string &filename, const std::vector< std::string > &missingSymbols, bool induceTypes=true)
read the database file for the score / parameter estimation and var names
std::string priorDbname_
the filename for the Dirichlet a priori, if any
double priorWeight_
the weight of the prior
double noiseEM_
the noise factor (in (0,1)) used by EM for perturbing the CPT during init
std::vector< std::pair< std::size_t, std::size_t > > ranges_
the set of rows' ranges within the database in which learning is done
std::string checkScorePriorCompatibility() const
checks whether the current score and prior are compatible
@ LOCAL_SEARCH_WITH_TABU_LIST
virtual void createPrior_()=0
create the prior used for learning
static DatabaseTable readFile_(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
K2 algoK2_
the K2 algorithm
AlgoType selectedAlgo_
the selected learning algorithm
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
Database scoreDatabase_
the database to be used by the scores and parameter estimators
ScoreType
an enumeration enabling to select easily the score we wish to use
bool useEM_
a Boolean indicating whether we should use EM for parameter learning or not
DAG2BNLearner dag2BN_
the parametric EM
Prior * prior_
the prior used
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t > > &new_ranges)
use a new set of database rows' ranges to perform learning
virtual ~IBNLearner()
destructor
std::pair< double, double > chi2(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
CorrectedMutualInformation * mutualInfo_
the selected correction for miic
@ DIRICHLET_FROM_DATABASE
bool isConstraintBased() const
indicate if the selected algorithm is constraint-based
Size nbDecreasingChanges_
StructuralConstraintNoChildrenNodes constraintNoChildrenNodes_
the constraint on no children nodes
ParamEstimatorType paramEstimatorType_
the type of the parameter estimator
ScoreType scoreType_
the score selected for learning
std::pair< double, double > G2(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
const ApproximationScheme * currentAlgorithm_
DAG learnDag_()
returns the DAG learnt
Database * priorDatabase_
the database used by the Dirichlet a priori
double mutualInformation(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the mutual information of id1 and id2 in the base, conditioned by knowing for the BNLearner.
void createScore_()
create the score used for learning
PriorType getPriorType_() const
returns the type (as a string) of a given prior
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
StructuralConstraintIndegree constraintIndegree_
the constraint for indegrees
PDAG learnPDAG()
learn a partial structure from a file (must have read the db before and must have selected miic)
std::string filename_
the filename database
bool hasMissingValues() const
returns true if the learner's database has missing values
SimpleMiic algoSimpleMiic_
the MIIC algorithm
Score * score_
the score used
StructuralConstraintMandatoryArcs constraintMandatoryArcs_
the constraint on mandatory arcs
Miic algoMiic_
the Constraint MIIC algorithm
void createCorrectedMutualInformation_()
create the Corrected Mutual Information instance for Miic
StructuralConstraintForbiddenArcs constraintForbiddenArcs_
the constraint on forbidden arcs
GreedyHillClimbing greedyHillClimbing_
the greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
double score(NodeId vars, const std::vector< NodeId > &knowing={})
Return the value of the score currently in use by the BNLearner of a variable given a set of other va...
StructuralConstraintTabuList constraintTabuList_
the constraint for tabu lists
DAG initialDag_
an initial DAG given to learners
IBNLearner & operator=(const IBNLearner &)
copy operator
static void isCSVFileName_(const std::string &filename)
checks whether the extension of a CSV filename is correct
MixedGraph prepareMiic_()
prepares the initial graph for miic
LocalSearchWithTabuList localSearchWithTabuList_
the local search with tabu list algorithm
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
ParamEstimator * createParamEstimator_(const DBRowGeneratorParser &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
StructuralConstraintSliceOrder constraintSliceOrder_
the constraint for 2TBNs
const DatabaseTable & database() const
returns the database used by the BNLearner
std::vector< double > rawPseudoCount(const std::vector< NodeId > &vars)
Return the pseudo-counts of NodeIds vars in the base in a raw array.
bool inducedTypes_
the policy for typing variables
CorrectedMutualInformation::KModeTypes kmodeMiic_
the penalty used in MIIC
double correctedMutualInformation(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the mutual information of id1 and id2 in the base, conditioned by knowing for the BNLearner.
void fillDatabase(DATABASE &database, const bool retry_insertion=false)
fills the rows of the database table
const std::vector< std::string > & variableNames()
returns the names of the variables in the input dataset
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
the class for computing Chi2 independence test scores
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
the class for computing G2 independence test scores
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId > &rhs_ids={})
get the pair <G2statistic,pvalue> for a test var1 indep var2 given rhs_ids
the no a priorclass: corresponds to 0 weight-sample
The class for estimating parameters of CPTs using Maximum Likelihood.
The base class for estimating parameters of CPTs.
void setRanges(const std::vector< std::pair< std::size_t, std::size_t > > &new_ranges)
sets new ranges to perform the counts used by the parameter estimator
virtual void setNumberOfThreads(Size nb)
sets the number max of threads that can be used
The class for giving access to pseudo count : count in the database + prior.
std::vector< double > get(const std::vector< NodeId > &ids)
returns the pseudo-count of a pair of nodes given some other nodes
the class for computing AIC scores
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
the class for computing Bayesian Dirichlet (BD) log2 scores
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
the class for computing BDeu scores
std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
the class for computing BIC scores
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
the class for computing K2 scores (actually their log2 value)
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
the class for computing Log2-likelihood scores
double score(const IdCondSet &idset)
returns the score for a given IdCondSet
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
The base class for all the scores used for learning (BIC, BDeu, etc).
The base class for structural constraints imposed by DAGs.
the structural constraint for forbidding the creation of some arcs during structure learning
the class for structural constraints limiting the number of parents of nodes in a directed graph
the structural constraint indicating that some arcs shall never be removed or reversed
the structural constraint for forbidding children for some nodes
the structural constraint for forbidding parents for some nodes
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
the "meta-programming" class for storing structural constraints
the structural constraint imposing a partial order over nodes
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
#define GUM_ERROR(type, msg)
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Set< Edge > EdgeSet
Some typdefs and define for shortcuts ...
Size NodeId
Type for node ids.
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
HashTable< NodeId, VAL > NodeProperty
Property on graph elements.
the class for computing Chi2 scores
the class for computing G2 scores
include the inlined functions if necessary
DatabaseTable readFile(const std::string &filename)
the base class for all the independence tests used for learning
the class for computing Log2-likelihood scores