aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
scoreBIC.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
48
50
51#ifndef DOXYGEN_SHOULD_SKIP_THIS
52
54# ifdef GUM_NO_INLINE
56# endif /* GUM_NO_INLINE */
57
58namespace gum {
59
60 namespace learning {
61
64 if (this != &from) {
65 Score::operator=(from);
66 _internal_prior_ = from._internal_prior_;
67 }
68 return *this;
69 }
70
73 if (this != &from) {
74 Score::operator=(std::move(from));
75 _internal_prior_ = std::move(from._internal_prior_);
76 }
77 return *this;
78 }
79
81 std::string ScoreBIC::isPriorCompatible(PriorType prior_type, double weight) {
82 // check that the prior is compatible with the score
83 if ((prior_type == PriorType::DirichletPriorType)
84 || (prior_type == PriorType::SmoothingPriorType)
85 || (prior_type == PriorType::NoPriorType)) {
86 return "";
87 }
88
89 // prior types unsupported by the type checker
90 std::stringstream msg;
91 msg << "The prior '" << priorTypeToString(prior_type)
92 << "' is not yet compatible with the score 'BIC'.";
93 return msg.str();
94 }
95
97 double ScoreBIC::score_(const IdCondSet& idset) {
98 // get the counts for all the nodes in the idset and add the prior
99 std::vector< double > N_ijk(this->counter_.counts(idset, true));
100 const bool informative_external_prior = this->prior_->isInformative();
101 if (informative_external_prior) this->prior_->addJointPseudoCount(idset, N_ijk);
102 const std::size_t all_size = N_ijk.size();
103
104 // here, we distinguish idsets with conditioning nodes from those
105 // without conditioning nodes
106 if (idset.hasConditioningSet()) {
107 // get the counts for the conditioning nodes
108 std::vector< double > N_ij(this->marginalize_(idset[0], N_ijk));
109 const std::size_t conditioning_size = N_ij.size();
110
111 // initialize the score: this should be the penalty of the BIC score,
112 // i.e., -(ri-1 ) * qi * .5 * log ( N + N' )
113 const std::size_t target_domsize = all_size / conditioning_size;
114 const double penalty = conditioning_size * double(target_domsize - std::size_t(1));
115
116 // compute the score: it remains to compute the log likelihood, i.e.,
117 // sum_k=1^r_i sum_j=1^q_i N_ijk log (N_ijk / N_ij), which is also
118 // equivalent to:
119 // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - sum_j=1^q_i N_ij log N_ij
120 double score = 0.0;
121 for (const auto n_ijk: N_ijk) {
122 if (n_ijk) { score += n_ijk * std::log(n_ijk); }
123 }
124 double N = 0;
125 for (const auto n_ij: N_ij) {
126 if (n_ij) {
127 score -= n_ij * std::log(n_ij);
128 N += n_ij;
129 }
130 }
131
132 // finally, remove the penalty
133 score -= penalty * std::log(N) * 0.5;
134
135 // divide by log(2), since the log likelihood uses log_2
136 score *= this->one_log2_;
137
138 return score;
139 } else {
140 // here, there are no conditioning nodes
141
142 // initialize the score: this should be the penalty of the BIC score,
143 // i.e., -(ri-1 )
144 const double penalty = double(all_size - std::size_t(1));
145
146 // compute the score: it remains to compute the log likelihood, i.e.,
147 // sum_k=1^r_i N_ijk log (N_ijk / N), which is also
148 // equivalent to:
149 // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - N log N
150 double N = 0.0;
151 double score = 0.0;
152 for (const auto n_ijk: N_ijk) {
153 if (n_ijk) {
154 score += n_ijk * std::log(n_ijk);
155 N += n_ijk;
156 }
157 }
158 score -= N * std::log(N);
159
160 // finally, remove the penalty
161 score -= penalty * std::log(N) * 0.5;
162
163 // divide by log(2), since the log likelihood uses log_2
164 score *= this->one_log2_;
165
166 return score;
167 }
168 }
169
171 double ScoreBIC::N(const IdCondSet& idset) {
172 // get the counts for all the nodes in the idset and add the prior
173 std::vector< double > N_ijk(this->counter_.counts(idset, true));
174 if (this->prior_->isInformative()) this->prior_->addJointPseudoCount(idset, N_ijk);
175
176 double N = 0;
177 for (const auto n_ijk: N_ijk) {
178 N += n_ijk;
179 }
180
181 return N;
182 }
183
184 } /* namespace learning */
185
186} /* namespace gum */
187
188#endif /* DOXYGEN_SHOULD_SKIP_THIS */
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
Definition idCondSet.h:214
the class for computing BIC scores
Definition scoreBIC.h:71
double N(const IdCondSet &idset)
returns the size of the database w.r.t. a given idset
ScoreBIC & operator=(const ScoreBIC &from)
copy operator
virtual double score_(const IdCondSet &idset) final
returns the score for a given IdCondSet
virtual std::string isPriorCompatible() const final
indicates whether the prior is compatible (meaningful) with the score
Prior * prior_
the expert knowledge a priorwe add to the score
Definition score.h:238
double score(const NodeId var)
returns the score of a single node
Score & operator=(const Score &from)
copy operator
RecordCounter counter_
the record counter used for the counts over discrete variables
Definition score.h:241
const double one_log2_
1 / log(2)
Definition score.h:235
std::vector< double > marginalize_(const NodeId X_id, const std::vector< double > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
include the inlined functions if necessary
Definition CSVParser.h:54
constexpr const char * priorTypeToString(PriorType e) noexcept
Definition prior.h:68
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
the class for computing BIC scores
the class for computing BIC scores