aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
DAG2BNLearner_tpl.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40#pragma once
41
48
49namespace gum {
50
51 namespace learning {
52
54 template < typename GUM_SCALAR >
55 void DAG2BNLearner::_probaVarReordering_(gum::Tensor< GUM_SCALAR >& pot,
56 const gum::Tensor< GUM_SCALAR >& other_pot) {
57 // check that the variables are identical
58 if (!pot.variablesSequence().diffSet(other_pot.variablesSequence()).empty()) {
59 GUM_ERROR(gum::CPTError, "the tensors do not have the same variables")
60 }
61
62 // perform the copy
63 Instantiation i(other_pot);
64 Instantiation j(pot);
65 for (i.setFirst(); !i.end(); ++i) {
66 j.setVals(i);
67 pot.set(j, other_pot[i]);
68 }
69 }
70
72 template < typename GUM_SCALAR >
73 INLINE BayesNet< GUM_SCALAR > DAG2BNLearner::createBN(ParamEstimator& estimator,
74 const DAG& dag) {
75 return DAG2BNLearner()._createBN_(estimator, dag, false);
76 }
77
79 template < typename GUM_SCALAR >
80 BayesNet< GUM_SCALAR > DAG2BNLearner::_createBN_(ParamEstimator& estimator,
81 const DAG& dag,
82 const bool compute_log_likelihood) {
83 BayesNet< GUM_SCALAR > bn;
84 log_likelihood_EM_ = 0.0;
85
86 // create a bn with dummy parameters corresponding to the dag
87 const auto& node2cols = estimator.nodeId2Columns();
88 const auto& database = estimator.database();
89 if (node2cols.empty()) {
90 for (const auto id: dag) {
91 bn.add(dynamic_cast< const DiscreteVariable& >(database.variable(id)), id);
92 }
93 } else {
94 for (const auto id: dag) {
95 const std::size_t col = node2cols.second(id);
96 bn.add(dynamic_cast< const DiscreteVariable& >(database.variable(col)), id);
97 }
98 }
99
100 // add the arcs
101 bn.beginTopologyTransformation();
102 for (const auto& arc: dag.arcs()) {
103 bn.addArc(arc.tail(), arc.head());
104 }
105 bn.endTopologyTransformation();
106
107 // estimate the parameters
108 const VariableNodeMap& varmap = bn.variableNodeMap();
109 for (const auto id: dag) {
110 // get the CPT of node id and its variables in the correct order
111 auto& pot = const_cast< Tensor< GUM_SCALAR >& >(bn.cpt(id));
112 const auto& vars = pot.variablesSequence();
113
114 // get the conditioning variables: they are all the variables except
115 // the last one in pot
116 std::vector< NodeId > conditioning_ids(vars.size() - 1);
117 for (auto i = std::size_t(1); i < vars.size(); ++i) {
118 conditioning_ids[i - 1] = varmap.get(*(vars[i]));
119 }
120
121 log_likelihood_EM_
122 += estimator.setParameters(id, conditioning_ids, pot, compute_log_likelihood);
123 }
124
125 return bn;
126 }
127
129 template < typename GUM_SCALAR >
130 INLINE BayesNet< GUM_SCALAR > DAG2BNLearner::createBNwithEM(ParamEstimator& bootstrap_estimator,
131 ParamEstimator& EM_estimator,
132 const DAG& dag) {
133 // for EM estimations, we need to disable caches
134 bootstrap_estimator.clear();
135 EM_estimator.clear();
136
137 // bootstrap EM by learning an initial model
138 BayesNet< GUM_SCALAR > bn = createBN< GUM_SCALAR >(bootstrap_estimator, dag);
139
140 return _performEM_(bootstrap_estimator, EM_estimator, std::move(bn));
141 }
142
144 template < typename GUM_SCALAR >
145 INLINE BayesNet< GUM_SCALAR > DAG2BNLearner::createBNwithEM(ParamEstimator& bootstrap_estimator,
146 ParamEstimator& EM_estimator,
147 const BayesNet< GUM_SCALAR >& bn) {
148 // for EM estimations, we need to disable caches
149 bootstrap_estimator.clear();
150 EM_estimator.clear();
151
152 auto bn_copy(bn);
153 return createBNwithEM(bootstrap_estimator, EM_estimator, std::move(bn_copy));
154 }
155
157 template < typename GUM_SCALAR >
158 BayesNet< GUM_SCALAR > DAG2BNLearner::createBNwithEM(ParamEstimator& bootstrap_estimator,
159 ParamEstimator& EM_estimator,
160 BayesNet< GUM_SCALAR >&& bn) {
161 // estimate the parameters of the fully zeroed CPTs using the bootstrap estimator
162 const VariableNodeMap& varmap = bn.variableNodeMap();
163 for (const auto id: bn.dag()) {
164 // get the CPT of node id and its variables in the correct order
165 auto& pot = const_cast< Tensor< GUM_SCALAR >& >(bn.cpt(id));
166
167 // check if the CPT contains only zeroes
168 bool all_zeroed = true;
169 for (gum::Instantiation inst(pot); !inst.end(); inst.inc()) {
170 if (pot[inst] != 0.0) {
171 all_zeroed = false;
172 break;
173 }
174 }
175
176 // estimate the initial parameters of pot if all_zeroed
177 if (all_zeroed) {
178 // get the conditioning variables: they are all the variables except
179 // the first one in pot
180 const auto& vars = pot.variablesSequence();
181 std::vector< NodeId > conditioning_ids(vars.size() - 1);
182 for (auto i = std::size_t(1); i < vars.size(); ++i) {
183 conditioning_ids[i - 1] = varmap.get(*(vars[i]));
184 }
185
186 // estimate the initial parameters of pot
187 bootstrap_estimator.setParameters(id, conditioning_ids, pot, false);
188 }
189 }
190
191 return _performEM_(bootstrap_estimator, EM_estimator, std::move(bn));
192 }
193
195 template < typename GUM_SCALAR >
196 BayesNet< GUM_SCALAR > DAG2BNLearner::_performEM_(ParamEstimator& bootstrap_estimator,
197 ParamEstimator& EM_estimator,
198 BayesNet< GUM_SCALAR >&& bn) {
199 // if there exist no missing value, there is no need to apply EM
200 if (!EM_estimator.database().hasMissingValues()) {
201 // here we start/stop the approx scheme to be able to display the number
202 // of EM iterations
205
206 auto bn_copy(bn);
207 return bn_copy;
208 }
209
210 if (!this->isEnabledMinEpsilonRate() && !this->isEnabledEpsilon() && !this->isEnabledMaxIter()
211 && !this->isEnabledMaxTime()) {
213 "EM cannot be executed because no stopping criterion among "
214 << "{min rate, min diff, max iter, max time} has been selected")
215 }
216
217 // as bn will be modified, be sure that the DAG is kept unchanged
218 const DAG dag = bn.dag();
219
220 // perturb the CPTs to initialize EM
221 if (noiseEM_ != 0.0) {
222 for (const auto& node: bn.nodes()) {
223 bn.cpt(node).noising(noiseEM_).normalizeAsCPT();
224 }
225 }
226
227 // perform EM
228 EM_estimator.setBayesNet(bn);
230
231 // compute the initial value of the log-likelihood
232 log_likelihood_EM_ = 0.0;
233 const VariableNodeMap& varmap = bn.variableNodeMap();
234 EM_estimator.counter_.clear(); // for EM estimations, we need to disable caches
235 for (const auto& node: bn.nodes()) {
236 // get node's CPT and its conditioning variables: they are all the
237 // variables except the first one in pot
238 const auto& pot = const_cast< Tensor< GUM_SCALAR >& >(bn.cpt(node));
239 const auto& vars = pot.variablesSequence();
240 std::vector< NodeId > conditioning_ids(vars.size() - 1);
241 for (auto i = std::size_t(1); i < vars.size(); ++i) {
242 conditioning_ids[i - 1] = varmap.get(*(vars[i]));
243 }
244
245 // compute the log-likelihood
246 IdCondSet idset(node, conditioning_ids, true);
247 const auto& N_ijk = EM_estimator.counter_.counts(idset, true);
248 Instantiation inst(pot);
249 for (std::size_t k = 0, end = pot.domainSize(); k < end; ++k, inst.inc()) {
250 if (N_ijk[k]) { log_likelihood_EM_ += N_ijk[k] * std::log(pot[inst]); }
251 }
252 }
253 double current_log_likelihood = log_likelihood_EM_;
254
255 // it may happen (luckily very seldom) that EM will decrease the
256 // log-likelihood instead of increasing it (see Table 5 on p28 of
257 // https://faculty.washington.edu/fxia/courses/LING572/EM_collins97.pdf
258 // for an example of such a behavior). In this case, instead of iterating
259 // EM and producing worst and worst Bayes nets, we stop the iterations
260 // early and we return the best Bayes net found so far.
261 BayesNet< GUM_SCALAR > best_bn;
262 bool must_return_best_bn = false;
263 unsigned int nb_dec_likelihood_iter = 0;
264 double delta = 0;
265
266 do {
267 // bugfix for parallel execution of VariableElimination
268 const auto& xdag = bn.dag();
269 for (const auto node: xdag) {
270 xdag.parents(node);
271 xdag.children(node);
272 }
273
274 EM_estimator.counter_.clear(); // for EM estimations, we need to disable caches
275 BayesNet< GUM_SCALAR > new_bn = _createBN_< GUM_SCALAR >(EM_estimator, dag, true);
277
278 if (log_likelihood_EM_ >= current_log_likelihood) {
279 // here, we increased the log-likelihood, it is fine
280 nb_dec_likelihood_iter = 0;
281 must_return_best_bn = false;
282 } else {
283 // here, we decreased the log-likelihood, so we should keep track of the
284 // best Bayes net found so far. If we decreased too many times the
285 // log-likelihood, we should even stop EM
286 ++nb_dec_likelihood_iter;
287 if (nb_dec_likelihood_iter == 1) {
288 best_bn = bn; // bn is the Bayes net computed at the previous step
289 must_return_best_bn = true;
290 }
291 if (nb_dec_likelihood_iter > max_nb_dec_likelihood_iter_) {
293 return best_bn;
294 }
295 }
296
297 // compute the difference in log-likelihood
298 delta = log_likelihood_EM_ - current_log_likelihood;
299 current_log_likelihood = log_likelihood_EM_;
300
301 bn = std::move(new_bn);
302 } while (continueApproximationScheme(this->isEnabledMinEpsilonRate() ? -log_likelihood_EM_
303 : delta));
304
305 stopApproximationScheme(); // just to be sure of the approximationScheme
306 // has been notified of the end of loop
307
308 return must_return_best_bn ? best_bn : bn;
309 }
310
311 } // namespace learning
312
313} /* namespace gum */
void updateApproximationScheme(unsigned int incr=1)
Update the scheme w.r.t the new error and increment steps.
bool isEnabledEpsilon() const override
Returns true if stopping criterion on epsilon is enabled, false otherwise.
bool isEnabledMaxTime() const override
Returns true if stopping criterion on timeout is enabled, false otherwise.
bool isEnabledMinEpsilonRate() const override
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
void initApproximationScheme()
Initialise the scheme.
void stopApproximationScheme()
Stop the approximation scheme.
bool isEnabledMaxIter() const override
Returns true if stopping criterion on max iterations is enabled, false otherwise.
bool continueApproximationScheme(double error)
Update the scheme w.r.t the new error.
Base class for dag.
Definition DAG.h:121
Base class for discrete random variable.
Class for assigning/browsing values to tuples of discrete variables.
bool end() const
Returns true if the Instantiation reached the end.
virtual const Sequence< const DiscreteVariable * > & variablesSequence() const final
Returns a const ref to the sequence of DiscreteVariable*.
virtual Size domainSize() const final
Returns the product of the variables domain size.
virtual void set(const Instantiation &i, const GUM_SCALAR &value) const final
Default implementation of MultiDimContainer::set().
Exception : operation not allowed.
Container used to map discrete variables with nodes.
const DiscreteVariable & get(NodeId id) const
Returns a discrete variable given it's node id.
DAG2BNLearner()
default constructor
static BayesNet< GUM_SCALAR > createBN(ParamEstimator &estimator, const DAG &dag)
create a BN from a DAG using a one pass generator (typically ML)
BayesNet< GUM_SCALAR > createBNwithEM(ParamEstimator &bootstrap_estimator, ParamEstimator &EM_estimator, const DAG &dag)
creates a BN with a given structure (dag) using the EM algorithm
bool hasMissingValues() const
indicates whether the database contains some missing values
The base class for estimating parameters of CPTs.
RecordCounter counter_
the record counter used to parse the database
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping from ids to column positions in the database
double setParameters(const NodeId target_node, const std::vector< NodeId > &conditioning_nodes, Tensor< GUM_SCALAR > &pot, const bool compute_log_likelihood=false)
sets a CPT's parameters and, possibly, return its log-likelihhod
virtual void clear()
clears all the data structures from memory
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the counter's generators depending on a BN
const DatabaseTable & database() const
returns the database on which we perform the counts
void clear()
clears all the last database-parsed counting from memory
const std::vector< double > & counts(const IdCondSet &ids, const bool check_discrete_vars=false)
returns the counts over all the variables in an IdCondSet
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46