aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
barrenNodesFinder.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
46#include <limits>
47
49
50#ifdef GUM_NO_INLINE
52#endif // GUM_NO_INLINE
53
54namespace gum {
55
58 // assign a mark to all the nodes
59 // and mark all the observed nodes and their ancestors as non-barren
60 NodeProperty< Size > mark(_dag_->size());
61 {
62 for (const auto node: *_dag_)
63 mark.insert(node, 0); // for the moment, 0 = possibly barren
64
65 // mark all the observed nodes and their ancestors as non barren
66 // std::numeric_limits<unsigned int>::max () will be necessarily non
67 // barren
68 // later on
69 Sequence< NodeId > observed_anc(_dag_->size());
70 const Size non_barren = std::numeric_limits< Size >::max();
71 for (const auto node: *_observed_nodes_)
72 observed_anc.insert(node);
73 for (Idx i = 0; i < observed_anc.size(); ++i) {
74 const NodeId node = observed_anc[i];
75 if (!mark[node]) {
76 mark[node] = non_barren;
77 for (const auto par: _dag_->parents(node)) {
78 if (!mark[par] && !observed_anc.exists(par)) { observed_anc.insert(par); }
79 }
80 }
81 }
82 }
83
84 // create the data structure that will contain the result of the
85 // method. By default, we assume that, for each pair of adjacent cliques,
86 // all
87 // the nodes that do not belong to their separator are possibly barren and,
88 // by sweeping the dag, we will remove the nodes that were determined
89 // above as non-barren. Structure result will assign to each (ordered) pair
90 // of adjacent cliques its set of barren nodes.
92 for (const auto& edge: junction_tree.edges()) {
93 const NodeSet& separator = junction_tree.separator(edge);
94
95 NodeSet non_barren1 = junction_tree.clique(edge.first());
96 for (auto iter = non_barren1.beginSafe(); iter != non_barren1.endSafe(); ++iter) {
97 if (mark[*iter] || separator.exists(*iter)) { non_barren1.erase(iter); }
98 }
99 result.insert(Arc(edge.first(), edge.second()), std::move(non_barren1));
100
101 NodeSet non_barren2 = junction_tree.clique(edge.second());
102 for (auto iter = non_barren2.beginSafe(); iter != non_barren2.endSafe(); ++iter) {
103 if (mark[*iter] || separator.exists(*iter)) { non_barren2.erase(iter); }
104 }
105 result.insert(Arc(edge.second(), edge.first()), std::move(non_barren2));
106 }
107
108 // for each node in the DAG, indicate which are the arcs in the result
109 // structure whose separator contain it: the separators are actually the
110 // targets of the queries.
111 NodeProperty< ArcSet > node2arc;
112 for (const auto node: *_dag_)
113 node2arc.insert(node, ArcSet());
114 for (const auto& elt: result) {
115 const Arc& arc = elt.first;
116 if (!result[arc].empty()) { // no need to further process cliques
117 const NodeSet& separator = // with no barren nodes
118 junction_tree.separator(Edge(arc.tail(), arc.head()));
119
120 for (const auto node: separator) {
121 node2arc[node].insert(arc);
122 }
123 }
124 }
125
126 // To determine the set of non-barren nodes w.r.t. a given single node
127 // query, we rely on the fact that those are precisely all the ancestors of
128 // this single node. To mutualize the computations, we will thus sweep the
129 // DAG from top to bottom and exploit the fact that the set of ancestors of
130 // the child of a given node A contain the ancestors of A. Therefore, we
131 // will
132 // determine sets of paths in the DAG and, for each path, compute the set of
133 // its barren nodes from the source to the destination of the path. The
134 // optimal set of paths, i.e., that which will minimize computations, is
135 // obtained by solving a "minimum path cover in directed acyclic graphs".
136 // But
137 // such an algorithm is too costly for the gain we can get from it, so we
138 // will
139 // rely on a simple heuristics.
140
141 // To compute the heuristics, we proceed as follows:
142 // 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
143 // node
144 // with a non-empty arcset in node2arc and we extract from those the
145 // roots, i.e., those nodes whose set of parents, if any, have all been
146 // identified as non-barren by being marked as
147 // std::numeric_limits<unsigned int>::max (). Such nodes are
148 // thus the top of the graph to sweep.
149 // 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
150 // and, for each node, if the node has several parents and children,
151 // keep only one arc from one of the parents to the child with the
152 // smallest
153 // number of parents, and try to create a matching between parents and
154 // children and add one arc for each edge of this matching. This will
155 // allow
156 // us to create distinct paths in the DAG. Whenever a child has no more
157 // parents, it becomes the root of a new path.
158 // 3/ the sweeping will be performed from the roots of all these paths.
159
160 // perform step 1/
161 NodeSet path_roots;
162 {
163 List< NodeId > nodes_to_mark;
164 for (const auto& elt: node2arc) {
165 if (!elt.second.empty()) { // only process nodes with assigned arcs
166 nodes_to_mark.insert(elt.first);
167 }
168 }
169 while (!nodes_to_mark.empty()) {
170 NodeId node = nodes_to_mark.front();
171 nodes_to_mark.popFront();
172
173 if (!mark[node]) { // mark the node and all its ancestors
174 mark[node] = 1;
175 Size nb_par = 0;
176 for (auto par: _dag_->parents(node)) {
177 Size parent_mark = mark[par];
178 if (parent_mark != std::numeric_limits< Size >::max()) {
179 ++nb_par;
180 if (parent_mark == 0) { nodes_to_mark.insert(par); }
181 }
182 }
183
184 if (nb_par == 0) { path_roots.insert(node); }
185 }
186 }
187 }
188
189 // perform step 2/
190 DAG sweep_dag = *_dag_;
191 for (const auto node: *_dag_) { // keep only nodes marked with 1
192 if (mark[node] != 1) { sweep_dag.eraseNode(node); }
193 }
194 for (const auto node: sweep_dag) {
195 const Size nb_parents = sweep_dag.parents(node).size();
196 const Size nb_children = sweep_dag.children(node).size();
197 if ((nb_parents > 1) || (nb_children > 1)) {
198 // perform the matching
199 const auto& parents = sweep_dag.parents(node);
200
201 // if there is no child, remove all the parents except the first one
202 if (nb_children == 0) {
203 auto iter_par = parents.beginSafe();
204 for (++iter_par; iter_par != parents.endSafe(); ++iter_par) {
205 sweep_dag.eraseArc(Arc(*iter_par, node));
206 }
207 } else {
208 // find the child with the smallest number of parents
209 const auto& children = sweep_dag.children(node);
210 NodeId smallest_child = 0;
211 Size smallest_nb_par = std::numeric_limits< Size >::max();
212 for (const auto child: children) {
213 const auto new_nb = sweep_dag.parents(child).size();
214 if (new_nb < smallest_nb_par) {
215 smallest_nb_par = new_nb;
216 smallest_child = child;
217 }
218 }
219
220 // if there is no parent, just keep the link with the smallest child
221 // and remove all the other arcs
222 if (nb_parents == 0) {
223 for (auto iter = children.beginSafe(); iter != children.endSafe(); ++iter) {
224 if (*iter != smallest_child) {
225 if (sweep_dag.parents(*iter).size() == 1) { path_roots.insert(*iter); }
226 sweep_dag.eraseArc(Arc(node, *iter));
227 }
228 }
229 } else {
230 auto nb_match = Size(std::min(nb_parents, nb_children) - 1);
231 auto iter_par = parents.beginSafe();
232 ++iter_par; // skip the first parent, whose arc with node will
233 // remain
234 auto iter_child = children.beginSafe();
235 for (Idx i = 0; i < nb_match; ++i, ++iter_par, ++iter_child) {
236 if (*iter_child == smallest_child) { ++iter_child; }
237 sweep_dag.addArc(*iter_par, *iter_child);
238 sweep_dag.eraseArc(Arc(*iter_par, node));
239 sweep_dag.eraseArc(Arc(node, *iter_child));
240 }
241 for (; iter_par != parents.endSafe(); ++iter_par) {
242 sweep_dag.eraseArc(Arc(*iter_par, node));
243 }
244 for (; iter_child != children.endSafe(); ++iter_child) {
245 if (*iter_child != smallest_child) {
246 if (sweep_dag.parents(*iter_child).size() == 1) { path_roots.insert(*iter_child); }
247 sweep_dag.eraseArc(Arc(node, *iter_child));
248 }
249 }
250 }
251 }
252 }
253 }
254
255 // step 3: sweep the paths from the roots of sweep_dag
256 // here, the idea is that, for each path of sweep_dag, the mark we put
257 // to the ancestors is a given number, say N, that increases from path
258 // to path. Hence, for a given path, all the nodes that are marked with a
259 // number at least as high as N are non-barren, the others being barren.
260 Idx mark_id = 2;
261 for (NodeId path: path_roots) {
262 // perform the sweeping from the path
263 while (true) {
264 // mark all the ancestors of the node
265 List< NodeId > to_mark{path};
266 while (!to_mark.empty()) {
267 NodeId node = to_mark.front();
268 to_mark.popFront();
269 if (mark[node] < mark_id) {
270 mark[node] = mark_id;
271 for (const auto par: _dag_->parents(node)) {
272 if (mark[par] < mark_id) { to_mark.insert(par); }
273 }
274 }
275 }
276
277 // now, get all the arcs that contained node "path" in their separator.
278 // this node acts as a query target and, therefore, its ancestors
279 // shall be non-barren.
280 const ArcSet& arcs = node2arc[path];
281 for (const auto& arc: arcs) {
282 NodeSet& barren = result[arc];
283 for (auto iter = barren.beginSafe(); iter != barren.endSafe(); ++iter) {
284 if (mark[*iter] >= mark_id) {
285 // this indicates a non-barren node
286 barren.erase(iter);
287 }
288 }
289 }
290
291 // go to the next sweeping node
292 const NodeSet& sweep_children = sweep_dag.children(path);
293 if (sweep_children.size()) {
294 path = *(sweep_children.begin());
295 } else {
296 // here, the path has ended, so we shall go to the next path
297 ++mark_id;
298 break;
299 }
300 }
301 }
302
303 return result;
304 }
305
308 // mark all the nodes in the dag as barren (true)
309 NodeProperty< bool > barren_mark = _dag_->nodesPropertyFromVal(true);
310
311 // mark all the ancestors of the evidence and targets as non-barren
312 List< NodeId > nodes_to_examine;
313 int nb_non_barren = 0;
314 for (const auto node: *_observed_nodes_)
315 nodes_to_examine.insert(node);
316 for (const auto node: *_target_nodes_)
317 nodes_to_examine.insert(node);
318
319 while (!nodes_to_examine.empty()) {
320 const NodeId node = nodes_to_examine.front();
321 nodes_to_examine.popFront();
322 if (barren_mark[node]) {
323 barren_mark[node] = false;
324 ++nb_non_barren;
325 for (const auto par: _dag_->parents(node))
326 nodes_to_examine.insert(par);
327 }
328 }
329
330 // here, all the nodes marked true are barren
331 NodeSet barren_nodes(_dag_->sizeNodes() - nb_non_barren);
332 for (const auto& marked_pair: barren_mark)
333 if (marked_pair.second) barren_nodes.insert(marked_pair.first);
334
335 return barren_nodes;
336 }
337
338} /* namespace gum */
Detect barren nodes for inference in Bayesian networks.
const NodeSet & parents(NodeId id) const
returns the set of nodes with arc ingoing to a given node
NodeSet children(const NodeSet &ids) const
returns the set of children of a set of nodes
virtual void eraseArc(const Arc &arc)
removes an arc from the ArcGraphPart
The base class for all directed edges.
GUM_NODISCARD NodeId head() const
returns the head of the arc
GUM_NODISCARD NodeId first() const
returns one extremal node ID (whichever one it is is unspecified)
GUM_NODISCARD NodeId tail() const
returns the tail of the arc
const NodeSet * _observed_nodes_
the set of observed nodes
const DAG * _dag_
the DAG on which we compute the barren nodes
NodeSet barrenNodes()
returns the set of barren nodes
const NodeSet * _target_nodes_
the set of targeted nodes
Basic graph of cliques.
Definition cliqueGraph.h:77
const NodeSet & separator(const Edge &edge) const
returns the separator included in a given edge
const NodeSet & clique(const NodeId idClique) const
returns the set of nodes included into a given clique
Base class for dag.
Definition DAG.h:121
void addArc(NodeId tail, NodeId head) final
insert a new arc into the directed graph
Definition DAG_inl.h:63
virtual void eraseNode(const NodeId id)
remove a node and its adjacent arcs from the graph
Definition diGraph_inl.h:79
const EdgeSet & edges() const
returns the set of edges stored within the EdgeGraphPart
The base class for all undirected edges.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
Generic doubly linked lists.
Definition list.h:379
Val & front() const
Returns a reference to first element of a list, if any.
Definition list_tpl.h:1703
bool empty() const noexcept
Returns a boolean indicating whether the chained list is empty.
Definition list_tpl.h:1831
void popFront()
Removes the first element of a List, if any.
Definition list_tpl.h:1825
Val & insert(const Val &val)
Inserts a new element at the end of the chained list (alias of pushBack).
Definition list_tpl.h:1515
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition set_tpl.h:438
Size size() const noexcept
Returns the number of elements in the set.
Definition set_tpl.h:636
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition set_tpl.h:414
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition set_tpl.h:426
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition set_tpl.h:533
void insert(const Key &k)
Inserts a new element into the set.
Definition set_tpl.h:539
void erase(const Key &k)
Erases an element from the set.
Definition set_tpl.h:582
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Size Idx
Type for indexes.
Definition types.h:79
Size NodeId
Type for node ids.
HashTable< Arc, VAL > ArcProperty
Property on graph elements.
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
HashTable< NodeId, VAL > NodeProperty
Property on graph elements.
Set< NodeId > NodeSet
Some typdefs and define for shortcuts ...
gum is the global namespace for all aGrUM entities
Definition agrum.h:46