51#ifndef DOXYGEN_SHOULD_SKIP_THIS
80 const std::vector< NodeId >& conditioning_nodes,
81 const bool compute_log_likelihood) {
84 IdCondSet idset(target_node, conditioning_nodes,
true);
89 const std::vector< double >& original_N_ijk = this->
counter_.counts(idset,
true);
90 std::vector< double > N_ijk = original_N_ijk;
91 const bool informative_external_prior = this->
external_prior_->isInformative();
95 if (informative_external_prior) this->
external_prior_->addJointPseudoCount(idset, N_ijk);
96 if (informative_score_internal_prior)
98 double log_likelihood = 0.0;
104 if (!conditioning_nodes.empty()) {
107 std::vector< double > N_ij(this->
counter_.counts(idset.conditionalIdCondSet(),
false));
108 if (informative_external_prior)
110 if (informative_score_internal_prior)
113 const std::size_t conditioning_domsize = N_ij.size();
114 const std::size_t target_domsize = N_ijk.size() / conditioning_domsize;
117 for (std::size_t j = std::size_t(0); j < conditioning_domsize; ++j) {
120 const std::size_t cond_nb = conditioning_nodes.size();
121 std::vector< Idx > cond_domsize(cond_nb);
123 const auto& node2cols = this->
counter_.nodeId2Columns();
125 if (node2cols.empty()) {
126 for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
127 cond_domsize[i] =
database.domainSize(conditioning_nodes[i]);
130 for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
131 cond_domsize[i] =
database.domainSize(node2cols.second(conditioning_nodes[i]));
136 std::vector< Idx > offsets(cond_nb);
139 for (i = std::size_t(0); i < cond_nb; ++i) {
141 offset *= cond_domsize[i];
143 std::vector< Idx > values(cond_nb);
146 for (
Idx jj = cond_nb - 1; i < cond_nb; ++i, --jj) {
147 values[jj] = offset / offsets[jj];
148 offset %= offsets[jj];
152 std::stringstream str;
153 str <<
"The conditioning set <";
155 for (i = std::size_t(0); i < cond_nb; ++i) {
156 if (deja) str <<
", ";
158 std::size_t col = node2cols.empty() ? conditioning_nodes[i]
159 : node2cols.second(conditioning_nodes[i]);
160 const DiscreteVariable& var
161 =
dynamic_cast< const DiscreteVariable&
>(
database.variable(col));
162 str << var.name() <<
"=" << var.labels()[values[i]];
164 auto target_col = node2cols.empty() ? target_node : node2cols.second(target_node);
165 const Variable& var =
database.variable(target_col);
166 str <<
"> for target node " << var.name()
167 <<
" never appears in the database. Please consider using "
168 <<
"priors such as smoothing.";
175 if (compute_log_likelihood) {
176 for (std::size_t j = std::size_t(0), k = std::size_t(0); j < conditioning_domsize; ++j) {
177 for (std::size_t i = std::size_t(0); i < target_domsize; ++i, ++k) {
179 if (original_N_ijk[k]) { log_likelihood += original_N_ijk[k] * std::log(N_ijk[k]); }
183 for (std::size_t j = std::size_t(0), k = std::size_t(0); j < conditioning_domsize; ++j) {
184 for (std::size_t i = std::size_t(0); i < target_domsize; ++i, ++k) {
194 for (
const double n_ijk: N_ijk)
198 if (compute_log_likelihood) {
199 for (std::size_t k = std::size_t(0), end = N_ijk.size(); k < end; ++k) {
201 if (original_N_ijk[k]) { log_likelihood += original_N_ijk[k] * std::log(N_ijk[k]); }
204 for (
double& n_ijk: N_ijk)
208 std::stringstream str;
210 const auto& node2cols = this->
counter_.nodeId2Columns();
212 auto target_col = node2cols.empty() ? target_node : node2cols.second(target_node);
213 const Variable& var =
database.variable(target_col);
214 str <<
"No data for target node " << var.name()
215 <<
". It is impossible to estimate the parameters by maximum "
221 return {std::move(N_ijk), log_likelihood};
The class for estimating parameters of CPTs using Maximum Likelihood.
ParamEstimatorML & operator=(const ParamEstimatorML &from)
copy operator
virtual ~ParamEstimatorML()
destructor
std::pair< std::vector< double >, double > _parametersAndLogLikelihood_(const NodeId target_node, const std::vector< NodeId > &conditioning_nodes, const bool compute_log_likelihood)
ParamEstimatorML(const DBRowGeneratorParser &parser, const Prior &external_prior, const Prior &_score_internal_prior, const std::vector< std::pair< std::size_t, std::size_t > > &ranges, const Bijection< NodeId, std::size_t > &nodeId2columns=Bijection< NodeId, std::size_t >())
default constructor
RecordCounter counter_
the record counter used to parse the database
ParamEstimator & operator=(const ParamEstimator &from)
copy operator
Prior * score_internal_prior_
if a score was used for learning the structure of the PGM, this is the priori internal to the score
const DatabaseTable & database() const
returns the database on which we perform the counts
Prior * external_prior_
an external a priori
#define GUM_ERROR(type, msg)
Size Idx
Type for indexes.
Size NodeId
Type for node ids.
include the inlined functions if necessary
gum is the global namespace for all aGrUM entities
the class for estimating parameters of CPTs using Maximum Likelihood
the class for estimating parameters of CPTs using Maximum Likelihood