51#ifndef DOXYGEN_SHOULD_SKIP_THIS
83 std::vector< double > N_xyz(this->
counter_.counts(idset,
true));
84 const bool informative_external_prior = this->
prior_->isInformative();
85 if (informative_external_prior) this->
prior_->addJointPseudoCount(idset, N_xyz);
86 const std::size_t all_size = (N_xyz.size());
89 const auto& nodeId2cols = this->
counter_.nodeId2Columns();
92 if (nodeId2cols.empty()) {
96 var_x = nodeId2cols.second(idset[0]);
97 var_y = nodeId2cols.second(idset[1]);
100 const std::size_t X_size =
database.domainSize(var_x);
101 const std::size_t Y_size =
database.domainSize(var_y);
103 double cumulStat = 0.0;
107 if (idset.hasConditioningSet()) {
108 const std::size_t Z_size = all_size / (X_size * Y_size);
111 std::vector< double > N_xz
112 = this->
marginalize_(std::size_t(1), X_size, Y_size, Z_size, N_xyz);
113 std::vector< double > N_yz
114 = this->
marginalize_(std::size_t(0), X_size, Y_size, Z_size, N_xyz);
115 std::vector< double > N_z
116 = this->
marginalize_(std::size_t(2), X_size, Y_size, Z_size, N_xyz);
119 std::vector< Idx > cond_nodes;
120 cond_nodes.reserve(idset.nbRHSIds());
122 const auto cond_idset = idset.conditionalIdCondSet().ids();
123 if (nodeId2cols.empty()) {
124 for (
const auto node: cond_idset)
125 cond_nodes.push_back(node);
127 for (
const auto node: cond_idset)
128 cond_nodes.push_back(nodeId2cols.second(node));
131 _chi2_.setConditioningNodes(cond_nodes);
136 for (std::size_t z = std::size_t(0),
137 beg_xz = std::size_t(0),
138 beg_yz = std::size_t(0),
139 xyz = std::size_t(0);
141 ++z, beg_xz += X_size, beg_yz += Y_size) {
143 for (std::size_t y = std::size_t(0), yz = beg_yz; y < Y_size; ++yz, ++y) {
144 for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size; ++xz, ++x, ++xyz) {
145 const double tmp1 = N_xyz[xyz] * N_z[z];
146 const double tmp2 = N_yz[yz] * N_xz[xz];
147 if ((tmp1 != 0.0) && (tmp2 != 0.0)) {
148 cumulStat += N_xyz[xyz] * std::log(tmp1 / tmp2);
153 xyz += X_size * Y_size;
160 _chi2_.setConditioningNodes(_empty_set_);
165 std::vector< double > N_x
166 = this->
marginalize_(std::size_t(1), X_size, Y_size, std::size_t(1), N_xyz);
167 std::vector< double > N_y
168 = this->
marginalize_(std::size_t(0), X_size, Y_size, std::size_t(1), N_xyz);
175 for (std::size_t y = std::size_t(0), xy = 0; y < Y_size; ++y) {
176 const double tmp_Ny = N_y[y];
177 for (std::size_t x = 0; x < X_size; ++x, ++xy) {
178 const double tmp = (tmp_Ny * N_x[x]);
179 if ((tmp != 0.0) && (N_xyz[xy] != 0.0)) {
180 cumulStat += N_xyz[xy] * std::log((N_xyz[xy] * N) / tmp);
190 Size df = _chi2_.degreesOfFreedom(var_x, var_y);
191 double pValue = _chi2_.probaChi2(cumulStat, df);
192 return std::pair< double, double >(cumulStat, pValue);
198 const auto& nodeId2cols = this->
counter_.nodeId2Columns();
200 if (nodeId2cols.empty()) {
204 var_x = nodeId2cols.second(idset[0]);
205 var_y = nodeId2cols.second(idset[1]);
209 double score = stat.first;
215 const double alpha = _chi2_.criticalValue(var_x, var_y);
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
the class for computing G2 independence test scores
IndepTestG2 & operator=(const IndepTestG2 &from)
copy operator
virtual double score_(const IdCondSet &idset) final
returns the score for a given IdCondSet
std::pair< double, double > statistics_(const IdCondSet &idset)
compute the pair <G2 statistic,pvalue>
std::vector< double > marginalize_(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
RecordCounter counter_
the record counter used for the counts over discrete variables
IndependenceTest & operator=(const IndependenceTest &from)
copy operator
Prior * prior_
the expert knowledge a priorwe add to the contingency tables
const DatabaseTable & database() const
return the database used by the score
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size Idx
Type for indexes.
the class for computing G2 scores
the class for computing G2 scores
include the inlined functions if necessary
gum is the global namespace for all aGrUM entities