54 template <
typename GUM_SCALAR >
55 void DAG2BNLearner::_probaVarReordering_(gum::Tensor< GUM_SCALAR >& pot,
56 const gum::Tensor< GUM_SCALAR >& other_pot) {
59 GUM_ERROR(gum::CPTError,
"the tensors do not have the same variables")
63 Instantiation i(other_pot);
65 for (i.setFirst(); !i.end(); ++i) {
67 pot.
set(j, other_pot[i]);
72 template <
typename GUM_SCALAR >
79 template <
typename GUM_SCALAR >
80 BayesNet< GUM_SCALAR > DAG2BNLearner::_createBN_(
ParamEstimator& estimator,
82 const bool compute_log_likelihood) {
83 BayesNet< GUM_SCALAR > bn;
84 log_likelihood_EM_ = 0.0;
88 const auto& database = estimator.
database();
89 if (node2cols.empty()) {
90 for (
const auto id: dag) {
94 for (
const auto id: dag) {
95 const std::size_t col = node2cols.second(
id);
96 bn.add(
dynamic_cast< const DiscreteVariable&
>(database.variable(col)),
id);
101 bn.beginTopologyTransformation();
102 for (
const auto& arc: dag.arcs()) {
103 bn.addArc(arc.tail(), arc.head());
105 bn.endTopologyTransformation();
108 const VariableNodeMap& varmap = bn.variableNodeMap();
109 for (
const auto id: dag) {
111 auto& pot =
const_cast< Tensor< GUM_SCALAR >&
>(bn.cpt(
id));
116 std::vector< NodeId > conditioning_ids(vars.size() - 1);
117 for (
auto i = std::size_t(1); i < vars.size(); ++i) {
118 conditioning_ids[i - 1] = varmap.get(*(vars[i]));
122 += estimator.
setParameters(
id, conditioning_ids, pot, compute_log_likelihood);
129 template <
typename GUM_SCALAR >
134 bootstrap_estimator.
clear();
135 EM_estimator.
clear();
140 return _performEM_(bootstrap_estimator, EM_estimator, std::move(bn));
144 template <
typename GUM_SCALAR >
147 const BayesNet< GUM_SCALAR >& bn) {
149 bootstrap_estimator.
clear();
150 EM_estimator.
clear();
153 return createBNwithEM(bootstrap_estimator, EM_estimator, std::move(bn_copy));
157 template <
typename GUM_SCALAR >
160 BayesNet< GUM_SCALAR >&& bn) {
163 for (
const auto id: bn.dag()) {
165 auto& pot =
const_cast< Tensor< GUM_SCALAR >&
>(bn.cpt(
id));
168 bool all_zeroed =
true;
170 if (pot[inst] != 0.0) {
181 std::vector< NodeId > conditioning_ids(vars.size() - 1);
182 for (
auto i = std::size_t(1); i < vars.size(); ++i) {
183 conditioning_ids[i - 1] = varmap.
get(*(vars[i]));
187 bootstrap_estimator.
setParameters(
id, conditioning_ids, pot,
false);
191 return _performEM_(bootstrap_estimator, EM_estimator, std::move(bn));
195 template <
typename GUM_SCALAR >
196 BayesNet< GUM_SCALAR > DAG2BNLearner::_performEM_(
ParamEstimator& bootstrap_estimator,
198 BayesNet< GUM_SCALAR >&& bn) {
213 "EM cannot be executed because no stopping criterion among "
214 <<
"{min rate, min diff, max iter, max time} has been selected")
218 const DAG dag = bn.dag();
221 if (noiseEM_ != 0.0) {
222 for (
const auto& node: bn.nodes()) {
223 bn.cpt(node).noising(noiseEM_).normalizeAsCPT();
232 log_likelihood_EM_ = 0.0;
233 const VariableNodeMap& varmap = bn.variableNodeMap();
235 for (
const auto& node: bn.nodes()) {
238 const auto& pot =
const_cast< Tensor< GUM_SCALAR >&
>(bn.cpt(node));
240 std::vector< NodeId > conditioning_ids(vars.size() - 1);
241 for (
auto i = std::size_t(1); i < vars.size(); ++i) {
242 conditioning_ids[i - 1] = varmap.get(*(vars[i]));
246 IdCondSet idset(node, conditioning_ids,
true);
248 Instantiation inst(pot);
249 for (std::size_t k = 0, end = pot.
domainSize(); k < end; ++k, inst.inc()) {
250 if (N_ijk[k]) { log_likelihood_EM_ += N_ijk[k] * std::log(pot[inst]); }
253 double current_log_likelihood = log_likelihood_EM_;
261 BayesNet< GUM_SCALAR > best_bn;
262 bool must_return_best_bn =
false;
263 unsigned int nb_dec_likelihood_iter = 0;
268 const auto& xdag = bn.dag();
269 for (
const auto node: xdag) {
275 BayesNet< GUM_SCALAR > new_bn = _createBN_< GUM_SCALAR >(EM_estimator, dag,
true);
278 if (log_likelihood_EM_ >= current_log_likelihood) {
280 nb_dec_likelihood_iter = 0;
281 must_return_best_bn =
false;
286 ++nb_dec_likelihood_iter;
287 if (nb_dec_likelihood_iter == 1) {
289 must_return_best_bn =
true;
291 if (nb_dec_likelihood_iter > max_nb_dec_likelihood_iter_) {
298 delta = log_likelihood_EM_ - current_log_likelihood;
299 current_log_likelihood = log_likelihood_EM_;
301 bn = std::move(new_bn);
308 return must_return_best_bn ? best_bn : bn;
void updateApproximationScheme(unsigned int incr=1)
Update the scheme w.r.t the new error and increment steps.
bool isEnabledEpsilon() const override
Returns true if stopping criterion on epsilon is enabled, false otherwise.
bool isEnabledMaxTime() const override
Returns true if stopping criterion on timeout is enabled, false otherwise.
bool isEnabledMinEpsilonRate() const override
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
void initApproximationScheme()
Initialise the scheme.
void stopApproximationScheme()
Stop the approximation scheme.
bool isEnabledMaxIter() const override
Returns true if stopping criterion on max iterations is enabled, false otherwise.
bool continueApproximationScheme(double error)
Update the scheme w.r.t the new error.
Base class for discrete random variable.
Class for assigning/browsing values to tuples of discrete variables.
bool end() const
Returns true if the Instantiation reached the end.
virtual const Sequence< const DiscreteVariable * > & variablesSequence() const final
Returns a const ref to the sequence of DiscreteVariable*.
virtual Size domainSize() const final
Returns the product of the variables domain size.
virtual void set(const Instantiation &i, const GUM_SCALAR &value) const final
Default implementation of MultiDimContainer::set().
Exception : operation not allowed.
Container used to map discrete variables with nodes.
const DiscreteVariable & get(NodeId id) const
Returns a discrete variable given it's node id.
DAG2BNLearner()
default constructor
static BayesNet< GUM_SCALAR > createBN(ParamEstimator &estimator, const DAG &dag)
create a BN from a DAG using a one pass generator (typically ML)
BayesNet< GUM_SCALAR > createBNwithEM(ParamEstimator &bootstrap_estimator, ParamEstimator &EM_estimator, const DAG &dag)
creates a BN with a given structure (dag) using the EM algorithm
bool hasMissingValues() const
indicates whether the database contains some missing values
The base class for estimating parameters of CPTs.
RecordCounter counter_
the record counter used to parse the database
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping from ids to column positions in the database
double setParameters(const NodeId target_node, const std::vector< NodeId > &conditioning_nodes, Tensor< GUM_SCALAR > &pot, const bool compute_log_likelihood=false)
sets a CPT's parameters and, possibly, return its log-likelihhod
virtual void clear()
clears all the data structures from memory
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the counter's generators depending on a BN
const DatabaseTable & database() const
returns the database on which we perform the counts
void clear()
clears all the last database-parsed counting from memory
const std::vector< double > & counts(const IdCondSet &ids, const bool check_discrete_vars=false)
returns the counts over all the variables in an IdCondSet
#define GUM_ERROR(type, msg)
include the inlined functions if necessary
gum is the global namespace for all aGrUM entities