aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
gspan_tpl.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40#pragma once
41
42
51
52namespace gum {
53 namespace prm {
54
55 template < typename GUM_SCALAR >
57 Timer t;
60
61 for (auto root = _tree_.roots().begin(); root != _tree_.roots().end(); ++root) {
62 if (_tree_.strategy().accept_root(&(_tree_.pattern(*root)))) {
63 gspan::Pattern& p = _tree_.pattern(*root);
64 _subgraph_mining_(graph, p);
65
66 for (const auto node: _tree_.iso_graph(p).nodes()) {
67 PRMInstance< GUM_SCALAR >* u = _tree_.iso_map(p, node).atPos(0);
68 PRMInstance< GUM_SCALAR >* v = _tree_.iso_map(p, node).atPos(1);
69 graph.graph().eraseEdge(Edge(graph.id(u), graph.id(v)));
70 }
71 }
72 }
73
75 }
76
77 template < typename GUM_SCALAR >
79 for (auto iter = _graph_->labels().begin(); iter != _graph_->labels().end(); ++iter) {
80 try {
81 if (_graph_->nodes(iter.second()).size() >= 2) {
82 _cost_.insert(
83 iter.second(),
84 _cost_func_(iter.second()->tree_width, _graph_->nodes(iter.second()).size()));
85 _nodes_.push_back(const_cast< gspan::LabelData* >(iter.second()));
86 }
87 } catch (NotFound const&) {
88 // It's a label over edges
89 if (_isEdgeEligible_(*(_graph_->edges(iter.second()).begin()))) {
90 _cost_.insert(
91 iter.second(),
92 _cost_func_(iter.second()->tree_width, _graph_->edges(iter.second()).size()));
93 _edges_.push_back(iter.second());
94 }
95 }
96 }
97
100 std::sort(_nodes_.begin(), _nodes_.end(), my_sort);
101 std::sort(_edges_.begin(), _edges_.end(), my_sort);
102 Size idx = 0;
103
104 for (auto iter = _nodes_.begin(); iter != _nodes_.end(); ++iter) {
105 (*iter)->id = ++idx;
106 new_labels->insert(idx, *iter);
107 }
108
109 for (auto iter = _edges_.begin(); iter != _edges_.end(); ++iter) {
110 (*iter)->id = ++idx;
111 new_labels->insert(idx, *iter);
112 _tree_.addRoot(**iter);
113 }
114
115 delete _graph_->_labels_;
116 _graph_->_labels_ = new_labels;
117 }
118
119 template < typename GUM_SCALAR >
121 gspan::Pattern& pat) {
122 std::vector< gspan::Pattern* > stack;
123 stack.push_back(&pat);
124 // Pointers used in the following while
125 gspan::Pattern* p = nullptr;
127 gspan::EdgeGrowth< GUM_SCALAR >* edge_growth = nullptr;
128 Sequence< PRMInstance< GUM_SCALAR >* >* seq = nullptr;
129 PRMInstance< GUM_SCALAR >* current = nullptr;
130 PRMInstance< GUM_SCALAR >* neighbor = nullptr;
131
132 // Neighbor_id is the neighbor's id in the interface graph and
133 // neighbor_node
134 // is its id in the rightmost path in the case of a backward edge growth
135 NodeId current_id = 0;
136 NodeId neighbor_node = 0;
137 gspan::LabelData* neighbor_label = 0;
138
139 typename gspan::EdgeData< GUM_SCALAR >* edge_data = nullptr;
140
141 size_t idx;
142 const std::list< NodeId >* children = 0;
143
144 while (!stack.empty()) {
145 // Getting next pattern
146 p = stack.back();
147 stack.pop_back();
148
149 if (p->code().codes.size() < _depth_stop_) {
150 // We need the rightmost path of p
151 std::list< NodeId > r_path;
152 p->rightmostPath(r_path);
153 // Mapping used to count each possible child of p, the position in the
154 // vector
155 // matches the one in the rightmost path
156 std::vector< HashTable< std::string, gspan::EdgeGrowth< GUM_SCALAR >* >* > count_vector;
157
158 for (size_t i = 0; i < r_path.size(); ++i)
159 count_vector.push_back(
160 new HashTable< std::string, gspan::EdgeGrowth< GUM_SCALAR >* >());
161
162 // For each subgraph represented by p, we look for a valid edge growth
163 // for
164 // each instance match of p in its isomorphism graph.
165 for (const auto iso_node: _tree_.iso_graph(*p).nodes()) {
166 seq = &(_tree_.iso_map(*p, iso_node));
167 idx = 0;
168
169 for (const auto node: r_path) {
170 edge_count = count_vector[idx];
171 // Retrieving the equivalent instance in the current match
172 current = seq->atPos((Idx)(node - 1));
173 current_id = ig.id(current);
174 // Checking for edges not in p
175
176 for (const auto neighbor_id: ig.graph().neighbours(current_id)) {
177 neighbor = ig.node(neighbor_id).n;
178
179 // We want a forward edge in any case or a backward edge if
180 // current
181 // is the rightmost vertex
182 if ((!seq->exists(neighbor)) || (node == r_path.back())) {
183 // Things we need to know: the LabelData data of the neighbour
184 // and,
185 // if it's a backward edge, its node id in the rightmost path
186 edge_data = &(ig.edge(current_id, neighbor_id));
187 neighbor_label = (neighbor == edge_data->u) ? edge_data->l_u : edge_data->l_v;
188 neighbor_node = (seq->exists(neighbor)) ? seq->pos(neighbor) + 1 : 0;
189 // Adding the edge growth to the edge_growth hashtable
190 gspan::EdgeGrowth< GUM_SCALAR > temp_growth(node,
191 edge_data->l,
192 neighbor_label,
193 neighbor_node);
194
195 try {
196 edge_growth = (*edge_count)[temp_growth.toString()];
197 edge_growth->insert(current, neighbor);
198 } catch (NotFound const&) {
199 edge_growth = new gspan::EdgeGrowth< GUM_SCALAR >(node,
200 edge_data->l,
201 neighbor_label,
202 neighbor_node);
203 edge_growth->insert(current, neighbor);
204 edge_count->insert(edge_growth->toString(), edge_growth);
205 }
206 }
207 }
208 }
209 }
210
211 // Removing any infrequent child
212 for (size_t node = 0; node < count_vector.size(); ++node) {
213 edge_count = count_vector[node];
214
215 for (const auto& elt: *edge_count) {
216 try {
217 _tree_.growPattern(*p, *elt.second, 2);
218 } catch (OperationNotAllowed const&) {
219 // The child was not minimal or was not worth considering
220 }
221
222 delete elt.second;
223 }
224
225 delete edge_count;
226 }
227
228 // Calling _subgraph_mining_ over children of p
229 children = &(_tree_.children(*p));
230
231 for (std::list< NodeId >::const_reverse_iterator child = children->rbegin();
232 child != children->rend();
233 ++child)
234 stack.push_back(&(_tree_.pattern(*child)));
235 }
236 }
237 }
238
239 template < typename GUM_SCALAR >
241 // First we put all the patterns in _patterns_.
242 std::vector< NodeId > stack;
243
244 for (std::list< NodeId >::reverse_iterator root = tree().roots().rbegin();
245 root != tree().roots().rend();
246 ++root)
247 stack.push_back(*root);
248
249 NodeId id = 0;
250 std::list< NodeId >* children = nullptr;
251
252 while (!stack.empty()) {
253 id = stack.back();
254 stack.pop_back();
255 _patterns_.push_back(&(tree().pattern(id)));
256 children = &(tree().children(tree().pattern(id)));
257
258 for (std::list< NodeId >::reverse_iterator child = children->rbegin();
259 child != children->rend();
260 ++child)
261 stack.push_back(*child);
262 }
263
264 if (!_patterns_.empty()) {
265 // We sort _patterns_.
267 std::sort(_patterns_.begin(), _patterns_.end(), my_sort);
268 // Now we need to find all the matches we can, using _patterns_.
269 // We start by the best Pattern and add it's maximal independent set to
270 // _chosen_
273 Sequence< PRMInstance< GUM_SCALAR >* >* match = nullptr;
274
275 for (const auto node: tree().max_indep_set(*(_patterns_.front()))) {
276 match = &(tree().iso_map(*(_patterns_.front()), node));
277
278 for (const auto i: *match)
279 _chosen_.insert(i);
280
281 matches->insert(match);
282 }
283
284 _matched_instances_.insert(_patterns_.front(), matches);
285 // Now we see what kind of pattern we can still use
286 bool found;
287 UndiGraph* iso_graph = nullptr;
288
289 for (auto patt = _patterns_.begin() + 1; patt != _patterns_.end(); ++patt) {
290 UndiGraph reduced_iso_graph;
291 std::vector< NodeId > degree_list;
292 iso_graph = &(tree().iso_graph(**patt));
293
294 for (const auto node: iso_graph->nodes()) {
295 found = false;
296 match = &(tree().iso_map(**patt, node));
297
298 for (const auto i: *match)
299 if (_chosen_.exists(i)) {
300 found = true;
301 break;
302 }
303
304 if (!found) {
305 // We add the pattern to the reduced isomorphism graph to compute
306 // the
307 // max independent set
308 // over the remaining matches
309 reduced_iso_graph.addNodeWithId(node);
310
311 for (const auto iso: reduced_iso_graph.nodes())
312 if (iso_graph->existsEdge(node, iso)) reduced_iso_graph.addEdge(node, iso);
313
314 degree_list.push_back(node);
315 }
316 }
317
318 // We create a new set to hold all the chosen matches of patt
320 // We can compute the max independent set and the matches belonging to
321 // it
322 typename gspan::DFSTree< GUM_SCALAR >::NeighborDegreeSort my_sort(reduced_iso_graph);
323 std::sort(degree_list.begin(), degree_list.end(), my_sort);
324 Set< NodeId > removed;
325
326 for (const auto node: degree_list)
327 if (!removed.exists(node)) {
328 // First we update removed to follow the max independent set
329 // algorithm
330 removed.insert(node);
331
332 for (const auto neighbor: reduced_iso_graph.neighbours(node))
333 removed.insert(neighbor);
334
335 // Second we update match and matches to keep track of the current
336 // match
337 match = &(tree().iso_map(**patt, node));
338 matches->insert(match);
339
340 for (const auto elt: *match)
341 _chosen_.insert(elt);
342 }
343
344 _matched_instances_.insert(*patt, matches);
345 }
346
347 // // We remove patterns with 0 matches
348 std::vector< size_t > trash;
349
350 for (size_t idx = 0; idx < _patterns_.size(); ++idx)
351 if (_matched_instances_[_patterns_[idx]]->size() < 2) trash.push_back(idx);
352
353 while (trash.size()) {
354 delete _matched_instances_[_patterns_[trash.back()]];
355 _matched_instances_.erase(_patterns_[trash.back()]);
356 // delete _patterns_[trash.back()];
357 _patterns_[trash.back()] = _patterns_.back();
358 _patterns_.pop_back();
359 trash.pop_back();
360 }
361 }
362 }
363
364 template < typename GUM_SCALAR >
366 const PRMSystem< GUM_SCALAR >& sys,
368 _graph_(new gspan::InterfaceGraph< GUM_SCALAR >(sys)), _tree_(*_graph_, strategy),
369 _depth_stop_(INT_MAX) {
370 GUM_CONSTRUCTOR(GSpan);
371 }
372
373 template < typename GUM_SCALAR >
375 GUM_DESTRUCTOR(GSpan);
376
377 for (const auto& elt: _matched_instances_)
378 delete elt.second;
379
380 delete _graph_;
381 }
382
383 template < typename GUM_SCALAR >
385 return _depth_stop_;
386 }
387
388 template < typename GUM_SCALAR >
390 _depth_stop_ = depth;
391 }
392
393 template < typename GUM_SCALAR >
397
398 template < typename GUM_SCALAR >
400 return _tree_;
401 }
402
403 template < typename GUM_SCALAR >
404 INLINE Idx GSpan< GUM_SCALAR >::_cost_func_(Size interface_size, Size frequency) {
405 return Idx(interface_size * frequency);
406 }
407
408 template < typename GUM_SCALAR >
409 INLINE std::vector< gspan::Pattern* >& GSpan< GUM_SCALAR >::patterns() {
410 return _patterns_;
411 }
412
413 template < typename GUM_SCALAR >
414 INLINE const std::vector< gspan::Pattern* >& GSpan< GUM_SCALAR >::patterns() const {
415 return _patterns_;
416 }
417
418 template < typename GUM_SCALAR >
421 return *(_matched_instances_[const_cast< gspan::Pattern* >(&p)]);
422 }
423
424 template < typename GUM_SCALAR >
425 INLINE const typename GSpan< GUM_SCALAR >::MatchedInstances&
427 return *(_matched_instances_[const_cast< gspan::Pattern* >(&p)]);
428 }
429
430 template < typename GUM_SCALAR >
434
435 template < typename GUM_SCALAR >
439
440 template < typename GUM_SCALAR >
442 return (_graph_->edges(e->l).size() >= 2) && (_graph_->nodes(e->l_u).size() >= 2)
443 && (_graph_->nodes(e->l_v).size() >= 2);
444 }
445
446 // LalbeSort
447
448 template < typename GUM_SCALAR >
450 GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::LabelSort);
451 }
452
453 template < typename GUM_SCALAR >
455 gspan(source.gspan) {
456 GUM_CONS_CPY(GSpan< GUM_SCALAR >::LabelSort);
457 }
458
459 template < typename GUM_SCALAR >
463
464 template < typename GUM_SCALAR >
466 gspan::LabelData* j) {
467 // We want a descending order
468 // return gspan-> _cost_[i] > gspan-> _cost_[j];
469 return gspan->_tree_.strategy()(i, j);
470 }
471
472 // PatternSort
473
474 template < typename GUM_SCALAR >
476 GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::PatternSort);
477 }
478
479 template < typename GUM_SCALAR >
481 gspan(source.gspan) {
483 }
484
485 template < typename GUM_SCALAR >
489
490 template < typename GUM_SCALAR >
492 // We want a descending order
493 return gspan->tree().strategy().operator()(i, j);
494 }
495
496 } /* namespace prm */
497} /* namespace gum */
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
Set of pairs of elements with fast search for both elements.
Definition bijection.h:1594
virtual void eraseEdge(const Edge &edge)
removes an edge from the EdgeGraphPart
bool existsEdge(const Edge &edge) const
indicates whether a given edge exists
const NodeSet & neighbours(NodeId id) const
returns the set of node neighbours to a given node
The base class for all undirected edges.
The class for generic Hash Tables.
Definition hashTable.h:637
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
const NodeGraphPart & nodes() const
return *this as a NodeGraphPart
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Exception : the element we looked for cannot be found.
Exception : operation not allowed.
bool exists(const Key &k) const
Check the existence of k in the sequence.
const Key & atPos(Idx i) const
Returns the object at the pos i.
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
The generic class for storing (ordered) sequences of objects.
Definition sequence.h:972
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition set_tpl.h:533
void insert(const Key &k)
Inserts a new element into the set.
Definition set_tpl.h:539
Class used to compute response times for benchmark purposes.
Definition timer.h:69
Base class for undirected graphs.
Definition undiGraph.h:128
void addEdge(NodeId first, NodeId second) override
insert a new edge into the undirected graph
void discoverPatterns()
This will methods will discover repeated patterns in the PRMSystem<GUM_SCALAR> assigned to this class...
Definition gspan_tpl.h:56
HashTable< gspan::Pattern *, MatchedInstances * > _matched_instances_
Mapping between a pattern and the multiset of instances matched to it.
Definition gspan.h:254
std::vector< gspan::LabelData * > _nodes_
The vector of nodes in graph, in decreasing order of interest.
Definition gspan.h:244
std::vector< gspan::Pattern * > _patterns_
The vector of discovered patters, in decreasing order of interest.
Definition gspan.h:241
~GSpan()
Destructor.
Definition gspan_tpl.h:374
HashTable< gspan::LabelData *, Idx > _cost_
Mapping between labels and their cost.
Definition gspan.h:250
MatchedInstances & matches(const gspan::Pattern &p)
Returns a mapping between patterns and the sequence of instance in the interface graph matching them.
Definition gspan_tpl.h:420
std::vector< gspan::Pattern * > & patterns()
Returns the Pattern mined by this class in a decreasing order of interest.
Definition gspan_tpl.h:409
gspan::InterfaceGraph< GUM_SCALAR > * _graph_
The interface graph used by this class.
Definition gspan.h:232
void setMaxDFSDepth(Size depth)
Defines the maximal depth of the DFSTree used by this class to discover new patterns.
Definition gspan_tpl.h:389
gspan::DFSTree< GUM_SCALAR > _tree_
The DFSTree used to discover new patters.
Definition gspan.h:235
Set< PRMInstance< GUM_SCALAR > * > _chosen_
Contains all instance which belongs to a discovered and used pattern.
Definition gspan.h:257
void _subgraph_mining_(gspan::InterfaceGraph< GUM_SCALAR > &graph, gspan::Pattern &p)
Discovers new patterns by developing p.
Definition gspan_tpl.h:120
std::vector< gspan::LabelData * > _edges_
The vector of edges in graph, in decreasing order of interest.
Definition gspan.h:247
GSpan(const PRM< GUM_SCALAR > &prm, const PRMSystem< GUM_SCALAR > &sys, gspan::SearchStrategy< GUM_SCALAR > *strategy=0)
Default constructor.
Definition gspan_tpl.h:365
Size getMaxDFSDepth() const
Returns the maximal depth of the DFSTree used to discover new patterns.
Definition gspan_tpl.h:384
gspan::DFSTree< GUM_SCALAR > & tree()
Returns the DFSTree used to discover new patters.
Definition gspan_tpl.h:394
gspan::InterfaceGraph< GUM_SCALAR > & interfaceGraph()
Returns the InterfaceGraph used by this.
Definition gspan_tpl.h:431
Set< Sequence< PRMInstance< GUM_SCALAR > * > * > MatchedInstances
Code alias.
Definition gspan.h:185
bool _isEdgeEligible_(typename gspan::EdgeData< GUM_SCALAR > *e)
Returns true if e is an eligible root edge.
Definition gspan_tpl.h:441
Size _depth_stop_
The max depth allowed for the DSF tree.
Definition gspan.h:238
void _sortNodesAndEdges_()
Sort the nodes and edges of graph.
Definition gspan_tpl.h:78
Size _cost_func_(Size interface_size, Size frequency)
Returns the cost with respect to an interface size and its frequency.
Definition gspan_tpl.h:404
void _sortPatterns_()
Sort the patterns and compute their respective costs.
Definition gspan_tpl.h:240
An PRMInstance is a Bayesian network fragment defined by a Class and used in a PRMSystem.
Definition PRMInstance.h:79
A PRMSystem is a container of PRMInstance and describe a relational skeleton.
Definition PRMSystem.h:70
This class represents a Probabilistic Relational PRMSystem<GUM_SCALAR>.
Definition PRM.h:74
std::vector< EdgeCode * > codes
The vector containing the EdgeCode composing this DFSCode.
Definition DFSCode.h:109
A DFSTree is used by gspan to sort lexicographically patterns discovered in an interface graph.
Definition DFSTree.h:77
Inner class to handle data about edges in graph.
PRMInstance< GUM_SCALAR > * u
One of the two instance represented by this edge.
LabelData * l_u
The label data of u.
LabelData * l
The labal data of this edge.
LabelData * l_v
The label data of v.
This class is used to define an edge growth of a pattern in this DFSTree.
Definition edgeGrowth.h:73
std::string toString()
Return a string representation of this.
void insert(PRMInstance< GUM_SCALAR > *u, PRMInstance< GUM_SCALAR > *v)
Add the pair (u,v) as a match for the current growth.
This class represent the interface graph of a given gum::prm::PRMSystem<GUM_SCALAR>.
NodeId id(const PRMInstance< GUM_SCALAR > &i) const
Returns the id of i in this interface graph.
EdgeData< GUM_SCALAR > & edge(NodeId u, NodeId v)
Returns data about an edge.
UndiGraph & graph()
Returns the graph of this interface graph.
NodeData< GUM_SCALAR > & node(const PRMInstance< GUM_SCALAR > *i)
Returns data about a node.
This contains all the information we want for a node in a DFSTree.
Definition pattern.h:90
void rightmostPath(std::list< NodeId > &r_path) const
Fill r_path with the rightmost path of this Pattern. The list is supposed empty.
DFSCode & code()
Returns the DFSCode of this Pattern.
This is an abstract class used to tune search strategies in the gspan algorithm.
Headers of the DFSTree class.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Size Idx
Type for indexes.
Definition types.h:79
Size NodeId
Type for node ids.
Headers of gspan.
namespace for all probabilistic relational models entities
Definition agrum.h:68
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
Private class used to sort LabelData using STL sort algorithms.
Definition gspan.h:300
LabelSort(GSpan *my_gspan)
Default constructor.
Definition gspan_tpl.h:449
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition gspan.h:327
bool operator()(gspan::LabelData *i, gspan::LabelData *j)
Returns true if i's cost is lesser than j's.
Definition gspan_tpl.h:465
Private class used to sort Pattern using STL sort algorithms.
Definition gspan.h:335
bool operator()(gspan::Pattern *i, gspan::Pattern *j)
Returns true if i's cost is lesser than j's.
Definition gspan_tpl.h:491
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition gspan.h:362
PatternSort(GSpan *my_gspan)
Default constructor.
Definition gspan_tpl.h:475
This is used to generate the max_indep_set of a Pattern.
Definition DFSTree.h:250
Inner class to handle data about labels in this interface graph.