aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
kNML.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
48
50
51#ifndef DOXYGEN_SHOULD_SKIP_THIS
52
54# ifdef GUM_NO_INLINE
56# endif /* GUM_NO_INLINE */
57
58namespace gum {
59
60 namespace learning {
61
63 KNML& KNML::operator=(const KNML& from) {
64 if (this != &from) {
66 _param_complexity_ = from._param_complexity_;
67 }
68 return *this;
69 }
70
72 KNML& KNML::operator=(KNML&& from) {
73 if (this != &from) {
74 IndependenceTest::operator=(std::move(from));
75 _param_complexity_ = std::move(from._param_complexity_);
76 }
77 return *this;
78 }
79
81 void KNML::clear() {
83 _param_complexity_.clearCache();
84 }
85
87 void KNML::clearCache() {
89 _param_complexity_.clearCache();
90 }
91
93 void KNML::useCache(const bool on_off) {
95 _param_complexity_.useCache(on_off);
96 }
97
99 double KNML::score_(const IdCondSet& idset) {
100 // perform the counts on the database for all the nodes in the idset
101 // This will help optimizing the computations of the Nxui, Nyui and Nui
102 // that we will be needed subsequently
103 this->counter_.counts(idset, true);
104
105 const bool informative_external_prior = this->prior_->isInformative();
106
107 // get the domain sizes of X and Y
108 const auto& db = this->database();
109 const auto& node2cols = this->nodeId2Columns();
110 std::size_t r_x, r_y;
111 if (!node2cols.empty()) {
112 r_x = db.domainSize(node2cols.second(idset[0]));
113 r_y = db.domainSize(node2cols.second(idset[1]));
114 } else {
115 r_x = db.domainSize(idset[0]);
116 r_y = db.domainSize(idset[1]);
117 }
118
119
120 // here, we distinguish idsets with conditioning nodes from those
121 // without conditioning nodes
122 if (idset.hasConditioningSet()) {
123 // now get the Nxui, Nyui and Nui
124 IdCondSet idset_xui = idset;
125 idset_xui.erase(idset[1]);
126 IdCondSet idset_yui = idset;
127 idset_yui.erase(idset[0]);
128
129 std::vector< double > N_ui = this->counter_.counts(idset.conditionalIdCondSet(), false);
130 std::vector< double > N_xui = this->counter_.counts(idset_xui, false);
131 std::vector< double > N_yui = this->counter_.counts(idset_yui, false);
132
133 if (informative_external_prior) {
134 this->prior_->addConditioningPseudoCount(idset, N_ui);
135 this->prior_->addJointPseudoCount(idset, N_xui);
136 this->prior_->addJointPseudoCount(idset, N_yui);
137 }
138
139
140 // the value of kNML is equal to:
141 // 0.5 * sum_Z ( sum_X( log( C^(r_y)_#ZX ) ) - log( C^(r_y)_#Z ) +
142 // sum_Y( log( C^(r_x)_#ZY ) ) - log( C^(r_x)_#Z ) )
143 double score = 0.0;
144 for (auto n_xui: N_xui)
145 score += _param_complexity_.log2Cnr(r_y, n_xui);
146 for (auto n_yui: N_yui)
147 score += _param_complexity_.log2Cnr(r_x, n_yui);
148 for (auto n_ui: N_ui) {
149 score -= _param_complexity_.log2Cnr(r_y, n_ui);
150 score -= _param_complexity_.log2Cnr(r_x, n_ui);
151 }
152
153 score *= 0.5;
154
155 return score;
156 } else {
157 // here, there is no conditioning set
158 // now get the Nxui, Nyui and Nui
159 IdCondSet idset_xui(idset[0], this->empty_ids_, true);
160 IdCondSet idset_yui(idset[1], this->empty_ids_, true);
161
162 std::vector< double > N_xui = this->counter_.counts(idset_xui, false);
163 std::vector< double > N_yui = this->counter_.counts(idset_yui, false);
164
165 if (informative_external_prior) {
166 this->prior_->addJointPseudoCount(idset, N_xui);
167 this->prior_->addJointPseudoCount(idset, N_yui);
168 }
169
170
171 // so, the formula for kNML is:
172 // 0.5 * ( sum_X( log( C^(r_y)_#X ) ) - log( C^(r_y)N_ ) +
173 // sum_Y( log( C^(r_x)_#Y ) ) - log( C^(r_x)N_ ) )
174 double N = 0.0;
175 double score = 0.0;
176 for (auto n_xui: N_xui) {
177 score += _param_complexity_.log2Cnr(r_y, n_xui);
178 N += n_xui;
179 }
180 for (auto n_yui: N_yui)
181 score += _param_complexity_.log2Cnr(r_x, n_yui);
182 score -= _param_complexity_.log2Cnr(r_y, N);
183 score -= _param_complexity_.log2Cnr(r_x, N);
184
185 score *= 0.5;
186
187 return score;
188 }
189 }
190
191 } /* namespace learning */
192
193} /* namespace gum */
194
195#endif /* DOXYGEN_SHOULD_SKIP_THIS */
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
Definition idCondSet.h:214
virtual void useCache(const bool on_off)
turn on/off the use of a cache of the previously computed score
const std::vector< NodeId > empty_ids_
an empty vector
RecordCounter counter_
the record counter used for the counts over discrete variables
IndependenceTest & operator=(const IndependenceTest &from)
copy operator
virtual void clear()
clears all the data structures from memory, including the cache
Prior * prior_
the expert knowledge a priorwe add to the contingency tables
virtual void clearCache()
clears the current cache
the class for computing the NML penalty used by MIIC
Definition kNML.h:67
KNML & operator=(const KNML &from)
copy operator
double score(const NodeId var1, const NodeId var2)
the scores
virtual double score_(const IdCondSet &idset) final
returns the score for a given IdCondSet
virtual void clear()
clears all the data structures from memory, including the C_n^r cache
const DatabaseTable & database() const
return the database used by the score
virtual void useCache(const bool on_off)
turn on/off the use of the C_n^r cache
virtual void clearCache()
clears the current C_n^r cache
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
return the mapping between the columns of the database and the node ids
The class for the NML penalty used in MIIC.
The class for the NML penalty used in MIIC.
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46