aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
independenceTest.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
47#ifndef GUM_LEARNING_INDEPENDENCE_TEST_H
48#define GUM_LEARNING_INDEPENDENCE_TEST_H
49
50#include <utility>
51
52#include <agrum/agrum.h>
53
58
59namespace gum {
60
61 namespace learning {
62
69 public:
70 // ##########################################################################
72 // ##########################################################################
74
76
97 const Prior& external_prior,
98 const std::vector< std::pair< std::size_t, std::size_t > >& ranges,
99 const Bijection< NodeId, std::size_t >& nodeId2columns
101
102
104
119 const Prior& external_prior,
120 const Bijection< NodeId, std::size_t >& nodeId2columns
122
124 virtual IndependenceTest* clone() const = 0;
125
128
130
131
132 // ##########################################################################
134 // ##########################################################################
136
138
142 virtual void setNumberOfThreads(Size nb);
143
145 virtual Size getNumberOfThreads() const;
146
148 virtual bool isGumNumberOfThreadsOverriden() const;
149
159 virtual void setMinNbRowsPerThread(const std::size_t nb) const;
160
162 virtual std::size_t minNbRowsPerThread() const;
163
165
171 void setRanges(const std::vector< std::pair< std::size_t, std::size_t > >& new_ranges);
172
175
177 const std::vector< std::pair< std::size_t, std::size_t > >& ranges() const;
178
179
181 double score(const NodeId var1, const NodeId var2);
182
184
188 double score(const NodeId var1, const NodeId var2, const std::vector< NodeId >& rhs_ids);
189
191 virtual void clear();
192
194 virtual void clearCache();
195
197 virtual void useCache(const bool on_off);
198
200
204
206 const DatabaseTable& database() const;
207
209
210
211 protected:
213 const double one_log2_{M_LOG2E};
214
216 Prior* prior_{nullptr};
217
220
223
225 bool use_cache_{true};
226
228 const std::vector< NodeId > empty_ids_;
229
230
233
236
239
242
244
247 virtual double score_(const IdCondSet& idset) = 0;
248
250
259 std::vector< double > marginalize_(const std::size_t node_2_marginalize,
260 const std::size_t X_size,
261 const std::size_t Y_size,
262 const std::size_t Z_size,
263 const std::vector< double >& N_xyz) const;
264 };
265
266 } /* namespace learning */
267
268} /* namespace gum */
269
270// include the inlined functions if necessary
271#ifndef GUM_NO_INLINE
273#endif /* GUM_NO_INLINE */
274
275#endif /* GUM_LEARNING_INDEPENDENCE_TEST_H */
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The class representing a tabular database as used by learning tasks.
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
Definition idCondSet.h:214
virtual void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context
IndependenceTest(IndependenceTest &&from)
move constructor
IndependenceTest(const IndependenceTest &from)
copy constructor
virtual void useCache(const bool on_off)
turn on/off the use of a cache of the previously computed score
double score(const NodeId var1, const NodeId var2, const std::vector< NodeId > &rhs_ids)
returns the score of a pair of nodes given some other nodes
IndependenceTest(const DBRowGeneratorParser &parser, const Prior &external_prior, const std::vector< std::pair< std::size_t, std::size_t > > &ranges, const Bijection< NodeId, std::size_t > &nodeId2columns=Bijection< NodeId, std::size_t >())
default constructor
const std::vector< NodeId > empty_ids_
an empty vector
std::vector< double > marginalize_(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
void setRanges(const std::vector< std::pair< std::size_t, std::size_t > > &new_ranges)
sets new ranges to perform the counts used by the independence test
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
const double one_log2_
1 / log(2)
virtual std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
virtual double score_(const IdCondSet &idset)=0
returns the score for a given IdCondSet
RecordCounter counter_
the record counter used for the counts over discrete variables
IndependenceTest & operator=(IndependenceTest &&from)
move operator
IndependenceTest & operator=(const IndependenceTest &from)
copy operator
virtual void clear()
clears all the data structures from memory, including the cache
Prior * prior_
the expert knowledge a priorwe add to the contingency tables
virtual ~IndependenceTest()
destructor
virtual Size getNumberOfThreads() const
returns the current max number of threads of the scheduler
bool use_cache_
a Boolean indicating whether we wish to use the cache
void clearRanges()
reset the ranges to the one range corresponding to the whole database
virtual void clearCache()
clears the current cache
virtual void setNumberOfThreads(Size nb)
sets the number max of threads that can be used
const DatabaseTable & database() const
return the database used by the score
ScoringCache cache_
the scoring cache
const std::vector< std::pair< std::size_t, std::size_t > > & ranges() const
returns the current ranges
virtual IndependenceTest * clone() const =0
virtual copy constructor
virtual bool isGumNumberOfThreadsOverriden() const
indicates whether the user set herself the number of threads
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
return the mapping between the columns of the database and the node ids
IndependenceTest(const DBRowGeneratorParser &parser, const Prior &external_prior, const Bijection< NodeId, std::size_t > &nodeId2columns=Bijection< NodeId, std::size_t >())
default constructor
the base class for all a priori
Definition prior.h:83
The class that computes counting of observations from the database.
a cache for caching scores and independence tests results
the classes to account for structure changes in a graph
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Size NodeId
Type for node ids.
the base class for all the independence tests used for learning
#define M_LOG2E
Definition math_utils.h:55
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
the base class for all a priori
The class that computes counting of observations from the database.
a cache for caching scores and independence tests results