aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
IBNLearner.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
52#ifndef GUM_LEARNING_GENERIC_BN_LEARNER_H
53#define GUM_LEARNING_GENERIC_BN_LEARNER_H
54
55#include <memory>
56#include <sstream>
57
58#include <agrum/agrum.h>
59
82
87
88namespace gum::learning {
90
99 public:
101 enum class ScoreType { AIC, BD, BDeu, BIC, K2, LOG2LIKELIHOOD };
102
105 enum class ParamEstimatorType { ML };
106
115
118
120 static constexpr double default_EM_noise{0.1};
121
123 class Database {
124 public:
125 // ########################################################################
127 // ########################################################################
129
131
141 explicit Database(const std::string& file,
142 const std::vector< std::string >& missing_symbols,
143 const bool induceTypes = false);
144
146
149 explicit Database(const DatabaseTable& db);
150
152
162 Database(const std::string& filename,
163 const Database& score_database,
164 const std::vector< std::string >& missing_symbols);
165
167
173 template < typename GUM_SCALAR >
174 Database(const std::string& filename,
176 const std::vector< std::string >& missing_symbols);
177
179 Database(const Database& from);
180
182 Database(Database&& from);
183
185 ~Database();
186
188
189 // ########################################################################
191 // ########################################################################
193
195 Database& operator=(const Database& from);
196
198 Database& operator=(Database&& from);
199
201
202 // ########################################################################
204 // ########################################################################
206
209
211 const std::vector< std::size_t >& domainSizes() const;
212
214 const std::vector< std::string >& names() const;
215
217 NodeId idFromName(const std::string& var_name) const;
218
220 const std::string& nameFromId(NodeId id) const;
221
223 const DatabaseTable& databaseTable() const;
224
227 void setDatabaseWeight(const double new_weight);
228
231
233 const std::vector< std::string >& missingSymbols() const;
234
236 std::size_t nbRows() const;
237
239 std::size_t size() const;
240
242
245 void setWeight(const std::size_t i, const double weight);
246
248
250 double weight(const std::size_t i) const;
251
253 double weight() const;
254
256
257 protected:
260
263
265 std::vector< std::size_t > _domain_sizes_;
266
269
272
275
276 private:
277 // returns the set of variables as a BN. This is convenient for
278 // the constructors of prior Databases
279 template < typename GUM_SCALAR >
280 BayesNet< GUM_SCALAR > _BNVars_() const;
281 };
282
283 // ##########################################################################
285 // ##########################################################################
287
300 IBNLearner(const std::string& filename,
301 const std::vector< std::string >& missingSymbols,
302 bool induceTypes = true);
303
304 explicit IBNLearner(const DatabaseTable& db);
305
323 template < typename GUM_SCALAR >
324 IBNLearner(const std::string& filename,
326 const std::vector< std::string >& missing_symbols);
327
329 IBNLearner(const IBNLearner&);
330
333
335 virtual ~IBNLearner();
336
338
339 // ##########################################################################
341 // ##########################################################################
343
346
349
351
352 // ##########################################################################
354 // ##########################################################################
356
358 DAG learnDAG();
359
362 PDAG learnPDAG();
363
365 void setInitialDAG(const DAG&);
366
368 DAG initialDAG();
369
371 const std::vector< std::string >& names() const;
372
374 const std::vector< std::size_t >& domainSizes() const;
375 Size domainSize(NodeId var) const;
376 Size domainSize(const std::string& var) const;
377
379
383 NodeId idFromName(const std::string& var_name) const;
384
386 const DatabaseTable& database() const;
387
390 void setDatabaseWeight(const double new_weight);
391
393
396 void setRecordWeight(const std::size_t i, const double weight);
397
399
401 double recordWeight(const std::size_t i) const;
402
404 double databaseWeight() const;
405
407 const std::string& nameFromId(NodeId id) const;
408
410
416 void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t > >& new_ranges);
417
419 void clearDatabaseRanges();
420
422
425 const std::vector< std::pair< std::size_t, std::size_t > >& databaseRanges() const;
426
428
448 std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold,
449 const std::size_t k_fold);
450
451
459 std::pair< double, double >
460 chi2(NodeId id1, NodeId id2, const std::vector< NodeId >& knowing = {});
468 std::pair< double, double > chi2(const std::string& name1,
469 const std::string& name2,
470 const std::vector< std::string >& knowing = {});
471
479 std::pair< double, double >
480 G2(NodeId id1, NodeId id2, const std::vector< NodeId >& knowing = {});
488 std::pair< double, double > G2(const std::string& name1,
489 const std::string& name2,
490 const std::vector< std::string >& knowing = {});
491
499 double logLikelihood(const std::vector< NodeId >& vars,
500 const std::vector< NodeId >& knowing = {});
501
509 double logLikelihood(const std::vector< std::string >& vars,
510 const std::vector< std::string >& knowing = {});
511
523 double mutualInformation(NodeId id1, NodeId id2, const std::vector< NodeId >& knowing = {});
524
536 double mutualInformation(const std::string& var1,
537 const std::string& var2,
538 const std::vector< std::string >& knowing = {});
539
540
554 NodeId id2,
555 const std::vector< NodeId >& knowing = {});
556
571 double correctedMutualInformation(const std::string& var1,
572 const std::string& var2,
573 const std::vector< std::string >& knowing = {});
574
583 double score(NodeId vars, const std::vector< NodeId >& knowing = {});
584
594 double score(const std::string& vars, const std::vector< std::string >& knowing = {});
595
601 std::vector< double > rawPseudoCount(const std::vector< NodeId >& vars);
602
608 std::vector< double > rawPseudoCount(const std::vector< std::string >& vars);
613 Size nbCols() const;
614
619 Size nbRows() const;
620
638 void useEM(const double epsilon, const double noise = default_EM_noise);
639
654 void useEMWithRateCriterion(const double epsilon, const double noise = default_EM_noise);
655
669 void useEMWithDiffCriterion(const double epsilon, const double noise = default_EM_noise);
670
672 void forbidEM();
673
675 bool isUsingEM() const;
676
686 EMApproximationScheme& EM();
687
690
692 std::string EMStateMessage() const;
693
695 bool hasMissingValues() const;
696
698
699 // ##########################################################################
701 // ##########################################################################
703
705 void useScoreAIC();
706
708 void useScoreBD();
709
711 void useScoreBDeu();
712
714 void useScoreBIC();
715
717 void useScoreK2();
718
721
723
724 // ##########################################################################
726 // ##########################################################################
728
730 void useNoPrior();
731
733
736 void useBDeuPrior(double weight = 1.0);
737
739
742 void useSmoothingPrior(double weight = 1);
743
745 void useDirichletPrior(const std::string& filename, double weight = 1);
746
748
750 std::string checkScorePriorCompatibility() const;
752
753 // ##########################################################################
755 // ##########################################################################
757
760
762
765 void useLocalSearchWithTabuList(Size tabu_size = 100, Size nb_decrease = 2);
766
768 void useK2(const Sequence< NodeId >& order);
769
771 void useK2(const std::vector< NodeId >& order);
772
774 void useMIIC();
775
777 bool isConstraintBased() const {
778 switch (selectedAlgo_) {
779 case AlgoType::K2 :
781 case AlgoType::LOCAL_SEARCH_WITH_TABU_LIST : return false;
782 case AlgoType::MIIC : return true;
783 default : throw OperationNotAllowed("Unknown algorithm");
784 }
785 }
786
788 bool isScoreBased() const { return !isConstraintBased(); }
789
791
792 // ##########################################################################
794 // ##########################################################################
798 void useNMLCorrection();
801 void useMDLCorrection();
804 void useNoCorrection();
805
808 std::vector< Arc > latentVariables() const;
809
811 // ##########################################################################
813 // ##########################################################################
815
817 void setMaxIndegree(Size max_indegree);
818
824 void setSliceOrder(const NodeProperty< NodeId >& slice_order);
825
830 void setSliceOrder(const std::vector< std::vector< std::string > >& slices);
831
833 void setForbiddenArcs(const ArcSet& set);
834
837 void addForbiddenArc(const Arc& arc);
838 void addForbiddenArc(NodeId tail, NodeId head);
839 void addForbiddenArc(const std::string& tail, const std::string& head);
841
844 void eraseForbiddenArc(const Arc& arc);
845 void eraseForbiddenArc(NodeId tail, NodeId head);
846 void eraseForbiddenArc(const std::string& tail, const std::string& head);
848
850 void setMandatoryArcs(const ArcSet& set);
851
854 void addMandatoryArc(const Arc& arc);
855 void addMandatoryArc(NodeId tail, NodeId head);
856 void addMandatoryArc(const std::string& tail, const std::string& head);
858
861 void eraseMandatoryArc(const Arc& arc);
862 void eraseMandatoryArc(NodeId tail, NodeId head);
863 void eraseMandatoryArc(const std::string& tail, const std::string& head);
865
868 void addNoParentNode(NodeId node);
869 void addNoParentNode(const std::string& node);
871
874 void eraseNoParentNode(NodeId node);
875 void eraseNoParentNode(const std::string& node);
876
879 void addNoChildrenNode(NodeId node);
880 void addNoChildrenNode(const std::string& node);
882
885 void eraseNoChildrenNode(NodeId node);
886 void eraseNoChildrenNode(const std::string& node);
888
893 void setPossibleEdges(const EdgeSet& set);
894 void setPossibleSkeleton(const UndiGraph& skeleton);
896
900 // are considered as impossible.
902 void addPossibleEdge(const Edge& edge);
903 void addPossibleEdge(NodeId tail, NodeId head);
904 void addPossibleEdge(const std::string& tail, const std::string& head);
906
909 void erasePossibleEdge(const Edge& edge);
910 void erasePossibleEdge(NodeId tail, NodeId head);
911 void erasePossibleEdge(const std::string& tail, const std::string& head);
913
915
916 // ##########################################################################
918 // ##########################################################################
920
922
926 void setNumberOfThreads(Size nb) override;
927
929
930 protected:
932 void _setPriorWeight_(double weight);
933
935 bool inducedTypes_{false};
936
939
941 Score* score_{nullptr};
942
945
947 bool useEM_{false};
948
950 double noiseEM_{0.1};
951
954
957
959 Prior* prior_{nullptr};
960
961 NoPrior* noPrior_{nullptr};
962
964 double priorWeight_{1.0f};
965
968
971
974
977
980
983
986
989
990
993
996
999
1002
1006
1009
1012
1015
1018
1020 std::vector< std::pair< std::size_t, std::size_t > > ranges_;
1021
1024
1026 std::string priorDbname_;
1027
1030
1032 std::string filename_{"-"};
1033
1034 // size of the tabu list
1036
1037 // the current algorithm as an approximationScheme
1039
1041 static DatabaseTable readFile_(const std::string& filename,
1042 const std::vector< std::string >& missing_symbols);
1043
1045 static void isCSVFileName_(const std::string& filename);
1046
1048 virtual void createPrior_() = 0;
1049
1051 void createScore_();
1052
1055 bool take_into_account_score = true);
1056
1058 DAG learnDag_();
1059
1062
1065
1067 PriorType getPriorType_() const;
1068
1071
1072 public:
1073 // ##########################################################################
1076 // ##########################################################################
1077 // in order to not pollute the proper code of IBNLearner, we
1078 // directly
1079 // implement those
1080 // very simples methods here.
1082 INLINE void setCurrentApproximationScheme(const ApproximationScheme* approximationScheme) {
1083 currentAlgorithm_ = approximationScheme;
1084 }
1085
1086 INLINE void distributeProgress(const ApproximationScheme* approximationScheme,
1087 Size pourcent,
1088 double error,
1089 double time) {
1090 setCurrentApproximationScheme(approximationScheme);
1091
1092 if (onProgress.hasListener()) GUM_EMIT3(onProgress, pourcent, error, time);
1093 };
1094
1096 INLINE void distributeStop(const ApproximationScheme* approximationScheme,
1097 const std::string& message) {
1098 setCurrentApproximationScheme(approximationScheme);
1099
1100 if (onStop.hasListener()) GUM_EMIT1(onStop, message);
1101 };
1102
1104
1109 void setEpsilon(double eps) override {
1110 algoK2_.approximationScheme().setEpsilon(eps);
1111 greedyHillClimbing_.setEpsilon(eps);
1112 localSearchWithTabuList_.setEpsilon(eps);
1113 };
1114
1116 double epsilon() const override {
1117 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->epsilon();
1118 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1119 }
1120
1122 void disableEpsilon() override {
1123 algoK2_.approximationScheme().disableEpsilon();
1124 greedyHillClimbing_.disableEpsilon();
1125 localSearchWithTabuList_.disableEpsilon();
1126 };
1127
1129 void enableEpsilon() override {
1130 algoK2_.approximationScheme().enableEpsilon();
1131 greedyHillClimbing_.enableEpsilon();
1132 localSearchWithTabuList_.enableEpsilon();
1133 };
1134
1137 bool isEnabledEpsilon() const override {
1138 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->isEnabledEpsilon();
1139 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1140 }
1141
1143
1149 void setMinEpsilonRate(double rate) override {
1150 algoK2_.approximationScheme().setMinEpsilonRate(rate);
1151 greedyHillClimbing_.setMinEpsilonRate(rate);
1152 localSearchWithTabuList_.setMinEpsilonRate(rate);
1153 };
1154
1156 double minEpsilonRate() const override {
1157 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->minEpsilonRate();
1158 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1159 }
1160
1162 void disableMinEpsilonRate() override {
1163 algoK2_.approximationScheme().disableMinEpsilonRate();
1164 greedyHillClimbing_.disableMinEpsilonRate();
1165 localSearchWithTabuList_.disableMinEpsilonRate();
1166 };
1167
1169 void enableMinEpsilonRate() override {
1170 algoK2_.approximationScheme().enableMinEpsilonRate();
1171 greedyHillClimbing_.enableMinEpsilonRate();
1172 localSearchWithTabuList_.enableMinEpsilonRate();
1173 };
1174
1177 bool isEnabledMinEpsilonRate() const override {
1178 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->isEnabledMinEpsilonRate();
1179 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1180 }
1181
1183
1189 void setMaxIter(Size max) override {
1190 algoK2_.approximationScheme().setMaxIter(max);
1191 greedyHillClimbing_.setMaxIter(max);
1192 localSearchWithTabuList_.setMaxIter(max);
1193 };
1194
1196 Size maxIter() const override {
1197 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->maxIter();
1198 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1199 }
1200
1202 void disableMaxIter() override {
1203 algoK2_.approximationScheme().disableMaxIter();
1204 greedyHillClimbing_.disableMaxIter();
1205 localSearchWithTabuList_.disableMaxIter();
1206 };
1207
1209 void enableMaxIter() override {
1210 algoK2_.approximationScheme().enableMaxIter();
1211 greedyHillClimbing_.enableMaxIter();
1212 localSearchWithTabuList_.enableMaxIter();
1213 };
1214
1217 bool isEnabledMaxIter() const override {
1218 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->isEnabledMaxIter();
1219 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1220 }
1221
1223
1228
1230 void setMaxTime(double timeout) override {
1231 algoK2_.approximationScheme().setMaxTime(timeout);
1232 greedyHillClimbing_.setMaxTime(timeout);
1233 localSearchWithTabuList_.setMaxTime(timeout);
1234 }
1235
1237 double maxTime() const override {
1238 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->maxTime();
1239 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1240 }
1241
1243 double currentTime() const override {
1244 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->currentTime();
1245 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1246 }
1247
1249 void disableMaxTime() override {
1250 algoK2_.approximationScheme().disableMaxTime();
1251 greedyHillClimbing_.disableMaxTime();
1252 localSearchWithTabuList_.disableMaxTime();
1253 };
1254
1255 void enableMaxTime() override {
1256 algoK2_.approximationScheme().enableMaxTime();
1257 greedyHillClimbing_.enableMaxTime();
1258 localSearchWithTabuList_.enableMaxTime();
1259 };
1260
1263 bool isEnabledMaxTime() const override {
1264 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->isEnabledMaxTime();
1265 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1266 }
1267
1269
1273 void setPeriodSize(Size p) override {
1274 algoK2_.approximationScheme().setPeriodSize(p);
1275 greedyHillClimbing_.setPeriodSize(p);
1276 localSearchWithTabuList_.setPeriodSize(p);
1277 };
1278
1279 Size periodSize() const override {
1280 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->periodSize();
1281 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1282 }
1283
1285
1288 void setVerbosity(bool v) override {
1289 algoK2_.approximationScheme().setVerbosity(v);
1290 greedyHillClimbing_.setVerbosity(v);
1291 localSearchWithTabuList_.setVerbosity(v);
1292 };
1293
1294 bool verbosity() const override {
1295 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->verbosity();
1296 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1297 }
1298
1300
1303
1305 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->stateApproximationScheme();
1306 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1307 }
1308
1310 Size nbrIterations() const override {
1311 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->nbrIterations();
1312 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1313 }
1314
1316 const std::vector< double >& history() const override {
1317 if (currentAlgorithm_ != nullptr) return currentAlgorithm_->history();
1318 else GUM_ERROR(FatalError, "No chosen algorithm for learning")
1319 }
1320
1322
1323
1326
1334 void EMsetEpsilon(double eps) { dag2BN_.setEpsilon(eps); }
1335
1337
1342 double EMEpsilon() const { return dag2BN_.epsilon(); }
1343
1345 void EMdisableEpsilon() { dag2BN_.disableEpsilon(); }
1346
1351 void EMenableEpsilon() { dag2BN_.enableEpsilon(); }
1352
1354 bool EMisEnabledEpsilon() const { return dag2BN_.isEnabledEpsilon(); }
1355
1362 void EMsetMinEpsilonRate(double rate) { dag2BN_.setMinEpsilonRate(rate); }
1363
1369 double EMMinEpsilonRate() const { return dag2BN_.minEpsilonRate(); }
1370
1372 void EMdisableMinEpsilonRate() { dag2BN_.disableMinEpsilonRate(); }
1373
1378 void EMenableMinEpsilonRate() { dag2BN_.enableMinEpsilonRate(); }
1379
1381 bool EMisEnabledMinEpsilonRate() const { return dag2BN_.isEnabledMinEpsilonRate(); }
1382
1388 void EMsetMaxIter(Size max) { dag2BN_.setMaxIter(max); }
1389
1395 Size EMMaxIter() const { return dag2BN_.maxIter(); }
1396
1398 void EMdisableMaxIter() { dag2BN_.disableMaxIter(); }
1399
1401 void EMenableMaxIter() { dag2BN_.enableMaxIter(); }
1402
1405 bool EMisEnabledMaxIter() const { return dag2BN_.isEnabledMaxIter(); }
1406
1412 void EMsetMaxTime(double timeout) { dag2BN_.setMaxTime(timeout); }
1413
1419 double EMMaxTime() const { return dag2BN_.maxTime(); }
1420
1422 double EMCurrentTime() const { return dag2BN_.currentTime(); }
1423
1425 void EMdisableMaxTime() { dag2BN_.disableMaxTime(); }
1426
1427 void EMenableMaxTime() { dag2BN_.enableMaxTime(); };
1428
1430 bool EMisEnabledMaxTime() const { return dag2BN_.isEnabledMaxTime(); }
1431
1436 void EMsetPeriodSize(Size p) { dag2BN_.setPeriodSize(p); }
1437
1438 Size EMPeriodSize() const { return dag2BN_.periodSize(); }
1439
1441 void EMsetVerbosity(bool v) { dag2BN_.setVerbosity(v); }
1442
1444 bool EMVerbosity() const { return dag2BN_.verbosity(); }
1445
1448 return dag2BN_.stateApproximationScheme();
1449 }
1450
1452 Size EMnbrIterations() const { return dag2BN_.nbrIterations(); }
1453
1458 const std::vector< double >& EMHistory() const { return dag2BN_.history(); }
1459
1461 };
1462
1463 /* namespace learning */
1464} // namespace gum::learning
1465
1467#ifndef GUM_NO_INLINE
1469#endif /* GUM_NO_INLINE */
1470
1472
1473#endif /* GUM_LEARNING_GENERIC_BN_LEARNER_H */
A class that, given a structure and a parameter estimator returns a full Bayes net.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
A DBRowGenerator class that returns the rows that are complete (fully observed) w....
A DBRowGenerator class that returns incomplete rows as EM would do.
A dirichlet priori: computes its N'_ijk from a database.
A pack of learning algorithms that can easily be used.
The K2 algorithm.
The SimpleMiic algorithm.
The base class for all directed edges.
Class representing a Bayesian network.
Definition BayesNet.h:93
Base class for dag.
Definition DAG.h:121
The base class for all undirected edges.
Exception : fatal (unknown ?) error.
Signaler1< const std::string & > onStop
Criteria messageApproximationScheme.
ApproximationSchemeSTATE
The different state of an approximation scheme.
Signaler3< Size, double, double > onProgress
Progression, error and time.
Base class for mixed graphs.
Definition mixedGraph.h:146
Exception : operation not allowed.
Base class for partially directed acyclic graphs.
Definition PDAG.h:130
ThreadNumberManager(Size nb_threads=0)
default constructor
Base class for undirected graphs.
Definition undiGraph.h:128
A class that redirects gum_signal from algorithms to the listeners of BNLearn.
The class computing n times the corrected mutual information, as used in the MIIC algorithm.
KModeTypes
the description type for the complexity correction
A class that, given a structure and a parameter estimator returns a full Bayes net.
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The class representing a tabular database as used by learning tasks.
The greedy hill climbing learning algorithm (for directed graphs).
a helper to easily read databases
Definition IBNLearner.h:123
const std::vector< std::string > & missingSymbols() const
returns the set of missing symbols taken into account
Database(const std::string &file, const std::vector< std::string > &missing_symbols, const bool induceTypes=false)
default constructor
const DatabaseTable & databaseTable() const
returns the internal database table
Size _min_nb_rows_per_thread_
the minimal number of rows to parse (on average) by thread
Definition IBNLearner.h:274
std::size_t size() const
returns the number of records in the database
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
std::vector< std::size_t > _domain_sizes_
the domain sizes of the variables (useful to speed-up computations)
Definition IBNLearner.h:265
DatabaseTable _database_
the database itself
Definition IBNLearner.h:259
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
double weight(const std::size_t i) const
returns the weight of the ith record
Bijection< NodeId, std::size_t > _nodeId2cols_
a bijection assigning to each variable name its NodeId
Definition IBNLearner.h:268
const std::vector< std::string > & names() const
returns the names of the variables in the database
void setWeight(const std::size_t i, const double weight)
sets the weight of the ith record
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
Database & operator=(const Database &from)
copy operator
DBRowGeneratorParser & parser()
returns the parser for the database
DBRowGeneratorParser * _parser_
the parser used for reading the database
Definition IBNLearner.h:262
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the database so that the sum of their weights is equal to new_weig...
BayesNet< GUM_SCALAR > _BNVars_() const
Database(const std::string &filename, const gum::BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
constructor with a BN providing the variables of interest
Size _max_threads_number_
the max number of threads authorized
Definition IBNLearner.h:271
std::size_t nbRows() const
returns the number of records in the database
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables
double weight() const
returns the weight of the whole database
A pack of learning algorithms that can easily be used.
Definition IBNLearner.h:98
StructuralConstraintPossibleEdges constraintPossibleEdges_
the constraint on possible Edges
Definition IBNLearner.h:979
Size EMnbrIterations() const
returns the number of iterations performed by the last EM execution
StructuralConstraintNoParentNodes constraintNoParentNodes_
the constraint on no parent nodes
Definition IBNLearner.h:985
void eraseNoChildrenNode(NodeId node)
double recordWeight(const std::size_t i) const
returns the weight of the ith record
BNLearnerPriorType priorType_
the a priorselected for the score and parameters
Definition IBNLearner.h:956
double EMMaxTime() const
@brief returns EM's timeout (in milliseconds)
void disableEpsilon() override
Disable stopping criterion on epsilon.
double EMMinEpsilonRate() const
Get the value of the minimal log-likelihood evolution rate of EM.
double EMEpsilon() const
Get the value of EM's min diff epsilon.
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
void EMenableEpsilon()
Enable the log-likelihood min diff stopping criterion in EM.
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
void disableMinEpsilonRate() override
Disable stopping criterion on epsilon rate.
void useScoreBDeu()
indicate that we wish to use a BDeu score
ApproximationSchemeSTATE EMStateApproximationScheme() const
get the current state of EM
void addNoParentNode(NodeId node)
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
bool verbosity() const override
verbosity
double minEpsilonRate() const override
Get the value of the minimal epsilon rate.
bool EMisEnabledMinEpsilonRate() const
bool isUsingEM() const
indicates whether we use EM for parameter learning
IBNLearner(const std::string &filename, const std::vector< std::string > &missingSymbols, bool induceTypes=true)
read the database file for the score / parameter estimation and var names
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
bool isEnabledMaxTime() const override
std::string priorDbname_
the filename for the Dirichlet a priori, if any
double priorWeight_
the weight of the prior
Definition IBNLearner.h:964
Size EMPeriodSize() const
sets the stopping criterion of EM as being the minimal difference between two consecutive log-likelih...
double maxTime() const override
returns the timeout (in seconds)
void disableMaxIter() override
Disable stopping criterion on max iterations.
double noiseEM_
the noise factor (in (0,1)) used by EM for perturbing the CPT during init
Definition IBNLearner.h:950
std::vector< std::pair< std::size_t, std::size_t > > ranges_
the set of rows' ranges within the database in which learning is done
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void EMdisableMaxTime()
Disable EM's timeout stopping criterion.
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
Size maxIter() const override
void EMsetMaxIter(Size max)
add a max iteration stopping criterion
std::string checkScorePriorCompatibility() const
checks whether the current score and prior are compatible
void useBDeuPrior(double weight=1.0)
use the BDeu prior
void setVerbosity(bool v) override
verbosity
void setMandatoryArcs(const ArcSet &set)
assign a set of mandatory arcs
void EMdisableMaxIter()
Disable stopping criterion on max iterations.
ParamEstimatorType
an enumeration to select the type of parameter estimation we shall apply
Definition IBNLearner.h:105
AlgoType
an enumeration to select easily the learning algorithm to use
Definition IBNLearner.h:117
void enableMaxIter() override
Enable stopping criterion on max iterations.
ApproximationSchemeSTATE EMState() const
returns the state of the last EM algorithm executed
bool EMisEnabledMaxIter() const
void EMsetVerbosity(bool v)
sets or unsets EM's verbosity
void setMaxTime(double timeout) override
stopping criterion on timeout If the criterion was disabled it will be enabled
virtual void createPrior_()=0
create the prior used for learning
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
double databaseWeight() const
returns the weight of the whole database
static DatabaseTable readFile_(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
void setMinEpsilonRate(double rate) override
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|) If the criterion was disabl...
bool EMisEnabledEpsilon() const
return true if EM's stopping criterion is the log-likelihood min diff
K2 algoK2_
the K2 algorithm
Definition IBNLearner.h:995
void EMdisableEpsilon()
Disable the min log-likelihood diff stopping criterion for EM.
void addMandatoryArc(const Arc &arc)
AlgoType selectedAlgo_
the selected learning algorithm
Definition IBNLearner.h:992
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
void setMaxIndegree(Size max_indegree)
sets the max indegree
void addPossibleEdge(const Edge &edge)
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
void setInitialDAG(const DAG &)
sets an initial DAG structure
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
Database scoreDatabase_
the database to be used by the scores and parameter estimators
const std::vector< double > & history() const override
INLINE void distributeStop(const ApproximationScheme *approximationScheme, const std::string &message)
distribute signals
bool isEnabledEpsilon() const override
ScoreType
an enumeration enabling to select easily the score we wish to use
Definition IBNLearner.h:101
bool useEM_
a Boolean indicating whether we should use EM for parameter learning or not
Definition IBNLearner.h:947
bool isScoreBased() const
indicate if the selected algorithm is score-based
Definition IBNLearner.h:788
void enableMaxTime() override
stopping criterion on timeout If the criterion was disabled it will be enabled
DAG2BNLearner dag2BN_
the parametric EM
ApproximationSchemeSTATE stateApproximationScheme() const override
history
Prior * prior_
the prior used
Definition IBNLearner.h:959
void EMenableMinEpsilonRate()
Enable the log-likelihood evolution rate stopping criterion.
bool EMVerbosity() const
returns the EM's verbosity status
void EMsetMaxTime(double timeout)
add a stopping criterion on timeout
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t > > &new_ranges)
use a new set of database rows' ranges to perform learning
virtual ~IBNLearner()
destructor
std::pair< double, double > chi2(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
INLINE void distributeProgress(const ApproximationScheme *approximationScheme, Size pourcent, double error, double time)
{@ /// distribute signals
void EMdisableMinEpsilonRate()
Disable the log-likelihood evolution rate stopping criterion.
CorrectedMutualInformation * mutualInfo_
the selected correction for miic
Definition IBNLearner.h:953
double EMCurrentTime() const
get the current running time in second (double)
BNLearnerPriorType
an enumeration to select the prior
Definition IBNLearner.h:108
void erasePossibleEdge(const Edge &edge)
bool isConstraintBased() const
indicate if the selected algorithm is constraint-based
Definition IBNLearner.h:777
void setMaxIter(Size max) override
stopping criterion on number of iterationsIf the criterion was disabled it will be enabled
void setNumberOfThreads(Size nb) override
sets the number max of threads that can be used
void useScoreBIC()
indicate that we wish to use a BIC score
StructuralConstraintNoChildrenNodes constraintNoChildrenNodes_
the constraint on no children nodes
Definition IBNLearner.h:988
DAG initialDAG()
returns the initial DAG structure
void setPossibleEdges(const EdgeSet &set)
assign a set of possible edges
void enableMinEpsilonRate() override
Enable stopping criterion on epsilon rate.
void useNoPrior()
use no prior
ParamEstimatorType paramEstimatorType_
the type of the parameter estimator
Definition IBNLearner.h:944
ScoreType scoreType_
the score selected for learning
Definition IBNLearner.h:938
std::pair< double, double > G2(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
const ApproximationScheme * currentAlgorithm_
bool isEnabledMaxIter() const override
Size periodSize() const override
how many samples between 2 stopping isEnableds
void eraseForbiddenArc(const Arc &arc)
void EMenableMaxIter()
Enable stopping criterion on max iterations.
void useSmoothingPrior(double weight=1)
use the prior smoothing
void disableMaxTime() override
Disable stopping criterion on timeout.
double currentTime() const override
get the current running time in second (double)
DAG learnDag_()
returns the DAG learnt
Database * priorDatabase_
the database used by the Dirichlet a priori
double mutualInformation(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the mutual information of id1 and id2 in the base, conditioned by knowing for the BNLearner.
void EMsetMinEpsilonRate(double rate)
sets the stopping criterion of EM as being the minimal log-likelihood's evolution rate
void createScore_()
create the score used for learning
PriorType getPriorType_() const
returns the type (as a string) of a given prior
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
void setPeriodSize(Size p) override
how many samples between 2 stopping isEnableds
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
void useScoreK2()
indicate that we wish to use a K2 score
StructuralConstraintIndegree constraintIndegree_
the constraint for indegrees
Definition IBNLearner.h:970
void setEpsilon(double eps) override
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)| If the criterion was disabled it ...
PDAG learnPDAG()
learn a partial structure from a file (must have read the db before and must have selected miic)
void _setPriorWeight_(double weight)
sets the prior weight
std::string filename_
the filename database
void setPossibleSkeleton(const UndiGraph &skeleton)
assign a set of possible edges
void useEMWithRateCriterion(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters with the rate stopping criterion
void useNMLCorrection()
indicate that we wish to use the NML correction for and MIIC
void useEM(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters
void useEMWithDiffCriterion(const double epsilon, const double noise=default_EM_noise)
use The EM algorithm to learn parameters with the diff stopping criterion
bool hasMissingValues() const
returns true if the learner's database has missing values
void enableEpsilon() override
Enable stopping criterion on epsilon.
void forbidEM()
prevent using the EM algorithm for parameter learning
double epsilon() const override
Get the value of epsilon.
SimpleMiic algoSimpleMiic_
the MIIC algorithm
Definition IBNLearner.h:998
Score * score_
the score used
Definition IBNLearner.h:941
StructuralConstraintMandatoryArcs constraintMandatoryArcs_
the constraint on mandatory arcs
Definition IBNLearner.h:982
Miic algoMiic_
the Constraint MIIC algorithm
void createCorrectedMutualInformation_()
create the Corrected Mutual Information instance for Miic
EMApproximationScheme & EM()
returns the EM parameter learning approximation scheme if EM is enabled
void EMsetPeriodSize(Size p)
how many samples between 2 stoppings isEnabled
void useNoCorrection()
indicate that we wish to use the NoCorr correction for MIIC
StructuralConstraintForbiddenArcs constraintForbiddenArcs_
the constraint on forbidden arcs
Definition IBNLearner.h:976
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
void setRecordWeight(const std::size_t i, const double weight)
sets the weight of the ith record of the database
GreedyHillClimbing greedyHillClimbing_
the greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
Size EMMaxIter() const
return the max number of iterations criterion
void useMDLCorrection()
indicate that we wish to use the MDL correction for MIIC
void useDirichletPrior(const std::string &filename, double weight=1)
use the Dirichlet prior from a database
double score(NodeId vars, const std::vector< NodeId > &knowing={})
Return the value of the score currently in use by the BNLearner of a variable given a set of other va...
StructuralConstraintTabuList constraintTabuList_
the constraint for tabu lists
Definition IBNLearner.h:973
Size nbrIterations() const override
void addForbiddenArc(const Arc &arc)
DAG initialDag_
an initial DAG given to learners
void addNoChildrenNode(NodeId node)
IBNLearner & operator=(const IBNLearner &)
copy operator
MixedGraph prepareSimpleMiic_()
prepares the initial graph for Simple Miic
bool isEnabledMinEpsilonRate() const override
Size domainSize(NodeId var) const
learn a structure from a file (must have read the db before)
void useScoreAIC()
indicate that we wish to use an AIC score
const std::vector< std::string > & names() const
returns the names of the variables in the database
void eraseMandatoryArc(const Arc &arc)
static void isCSVFileName_(const std::string &filename)
checks whether the extension of a CSV filename is correct
MixedGraph prepareMiic_()
prepares the initial graph for miic
void useMIIC()
indicate that we wish to use MIIC
LocalSearchWithTabuList localSearchWithTabuList_
the local search with tabu list algorithm
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
INLINE void setCurrentApproximationScheme(const ApproximationScheme *approximationScheme)
{@ /// distribute signals
ParamEstimator * createParamEstimator_(const DBRowGeneratorParser &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
StructuralConstraintSliceOrder constraintSliceOrder_
the constraint for 2TBNs
Definition IBNLearner.h:967
const DatabaseTable & database() const
returns the database used by the BNLearner
static constexpr double default_EM_noise
the default noise amount added to CPTs during EM's initialization (see method useEM())
Definition IBNLearner.h:120
void eraseNoParentNode(NodeId node)
const std::vector< double > & EMHistory() const
returns the history of the last EM execution
std::vector< double > rawPseudoCount(const std::vector< NodeId > &vars)
Return the pseudo-counts of NodeIds vars in the base in a raw array.
bool EMisEnabledMaxTime() const
bool inducedTypes_
the policy for typing variables
Definition IBNLearner.h:935
CorrectedMutualInformation::KModeTypes kmodeMiic_
the penalty used in MIIC
void EMsetEpsilon(double eps)
sets the stopping criterion of EM as being the minimal difference between two consecutive log-likelih...
double correctedMutualInformation(NodeId id1, NodeId id2, const std::vector< NodeId > &knowing={})
Return the mutual information of id1 and id2 in the base, conditioned by knowing for the BNLearner.
void useScoreBD()
indicate that we wish to use a BD score
void EMenableMaxTime()
sets the stopping criterion of EM as being the minimal difference between two consecutive log-likelih...
std::string EMStateMessage() const
returns the state of the EM algorithm
The K2 algorithm.
Definition K2.h:63
The local search with tabu list learning algorithm (for directed graphs).
The Miic learning algorithm.
Definition Miic.h:126
the no a priorclass: corresponds to 0 weight-sample
Definition noPrior.h:65
The base class for estimating parameters of CPTs.
the base class for all a priori
Definition prior.h:83
The base class for all the scores used for learning (BIC, BDeu, etc).
Definition score.h:68
The miic learning algorithm.
Definition SimpleMiic.h:83
the structural constraint for forbidding the creation of some arcs during structure learning
the class for structural constraints limiting the number of parents of nodes in a directed graph
the structural constraint indicating that some arcs shall never be removed or reversed
the structural constraint for forbidding children for some nodes
the structural constraint for forbidding parents for some nodes
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
the structural constraint imposing a partial order over nodes
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
Class building the essential Graph from a DAGmodel.
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
The basic class for computing the set of digraph changes allowed by the user to be executed by the le...
The basic class for computing the set of digraph changes allowed by the user to be executed by the le...
The mecanism to compute the next available graph changes for directed structure learning search algor...
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Set< Edge > EdgeSet
Some typdefs and define for shortcuts ...
Size NodeId
Type for node ids.
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
HashTable< NodeId, VAL > NodeProperty
Property on graph elements.
The local search learning with tabu list algorithm (for directed graphs).
include the inlined functions if necessary
Definition CSVParser.h:54
unsigned int getNumberOfThreads()
returns the max number of threads used by default when entering the next parallel region
the class for estimating parameters of CPTs using Maximum Likelihood
the class for computing AIC scores
the class for computing Bayesian Dirichlet (BD) log2 scores
the class for computing BDeu scores
the class for computing K2 scores (actually their log2 value)
#define GUM_EMIT1(signal, arg1)
Definition signaler1.h:61
#define GUM_EMIT3(signal, arg1, arg2, arg3)
Definition signaler3.h:61
the base class for structural constraints imposed by DAGs
the structural constraint for forbidding the creation of some arcs during structure learning
the class for structural constraints limiting the number of parents of nodes in a directed graph
the structural constraint indicating that some arcs shall never be removed or reversed
the structural constraint for forbidding children for some nodes during structure learning
the structural constraint for forbidding parents for some nodes during structure learning
the structural constraint for forbidding the creation of some arcs during structure learning
the structural constraint imposing a partial order over nodes
the class imposing a N-sized tabu list as a structural constraints for learning algorithms