aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
BNDatabaseGenerator.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
86
87#ifndef GUM_BN_DATABASE_GENERATOR
88#define GUM_BN_DATABASE_GENERATOR
89
90#include <fstream>
91
94#include <agrum/BN/BayesNet.h>
95
96namespace gum {
97
98 namespace learning {
99 template < typename GUM_SCALAR >
101 public:
103
104 // #######################################################################
106 // #######################################################################
108
110 explicit BNDatabaseGenerator(const BayesNet< GUM_SCALAR >& bn);
111
114
116
117 // #######################################################################
119 // #######################################################################
121
122
125 double drawSamples(Size nbSamples);
126
143 double drawSamples(Size nbSamples, const gum::Instantiation& evs, int timeout = 300);
144
153
157
160
162 void toCSV(const std::string& csvFileURL,
163 bool useLabels = true,
164 bool append = false,
165 std::string csvSeparator = ",",
166 bool checkOnAppend = false) const;
167
169 DatabaseTable toDatabaseTable(bool useLabels = true) const;
170
173 std::vector< std::vector< Idx > > database() const;
174
175 Size samplesNbRows() const;
176 Size samplesNbCols() const;
177
178 Idx samplesAt(Idx row, Idx col) const;
179 std::string samplesLabelAt(Idx row, Idx col) const;
180
182 void setVarOrder(const std::vector< Idx >& varOrder);
183
185 void setVarOrder(const std::vector< std::string >& varOrder);
186
188 void setVarOrderFromCSV(const std::string& csvFileURL, const std::string& csvSeparator = ",");
189
192
195
197 void setRandomVarOrder();
198
200 std::vector< Idx > varOrder() const;
201
203 std::vector< std::string > varOrderNames() const;
204
206 double log2likelihood() const;
207
209 const BayesNet< GUM_SCALAR >& bn(void) { return _bn_; };
210
212
213 private:
216 const BayesNet< GUM_SCALAR >& _bn_;
217
220
223
225 std::vector< std::vector< Idx > > _database_;
226
228 std::vector< Idx > _varOrder_;
229
231 bool _drawnSamples_ = false;
232
235
238 std::string _label_(const std::vector< Idx >& row, const DiscreteVariable& v, Idx i) const;
239
241 std::vector< Idx > _varOrderFromCSV_(const std::string& csvFileURL,
242 const std::string& csvSeparator = ",") const;
243
245 std::vector< Idx > _varOrderFromCSV_(std::ifstream& csvFile,
246 const std::string& csvSeparator = ",") const;
247
248 // forbidden copies / moves
253 };
254
255
256#ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
257 extern template class BNDatabaseGenerator< double >;
258#endif
259
260 } /* namespace learning */
261} /* namespace gum */
262
264#endif /* BN_DATABASE_GENERTOR */
Class representing Bayesian networks.
Base class for discrete random variable.
Class for assigning/browsing values to tuples of discrete variables.
Notification for progress using listener.
bool _drawnSamples_
whether drawSamples has been already called.
std::vector< Idx > varOrder() const
returns variable order indexes
std::string samplesLabelAt(Idx row, Idx col) const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
DatabaseTable toDatabaseTable(bool useLabels=true) const
generates a DatabaseVectInRAM
void setVarOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",")
change columns order according to a csv file
Size samplesNbRows() const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
BNDatabaseGenerator(BNDatabaseGenerator &&)=delete
std::string _label_(const std::vector< Idx > &row, const DiscreteVariable &v, Idx i) const
return the final string for a label (taking into account the behavior for DiscretizedVariable) from a...
BNDatabaseGenerator & operator=(const BNDatabaseGenerator &)=delete
std::vector< std::vector< Idx > > database() const
generates database according to bn into a std::vector
void setDiscretizedLabelModeRandom()
set the behaviour of sampling for discretized variable to uniformly draw double value
BNDatabaseGenerator & operator=(BNDatabaseGenerator &&)=delete
Idx samplesAt(Idx row, Idx col) const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
double _log2likelihood_
log2Likelihood of generated samples
const BayesNet< GUM_SCALAR > & bn(void)
return const ref to the Bayes Net
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
void setDiscretizedLabelModeInterval()
set the behaviour of sampling for discretized variable to select the label : "[min,...
Bijection< std::string, NodeId > _names2ids_
bijection nodes names
std::vector< std::vector< Idx > > _database_
generated database
BNDatabaseGenerator(const BNDatabaseGenerator &)=delete
Size samplesNbCols() const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
const BayesNet< GUM_SCALAR > & _bn_
Bayesian network.
void setAntiTopologicalVarOrder()
set columns in antiTopoligical order
double log2likelihood() const
returns log2Likelihood of generated samples
void setTopologicalVarOrder()
set columns in topoligical order
void setDiscretizedLabelModeMedian()
set the behaviour of sampling for discretized variable to deterministic select double median of inter...
double drawSamples(Size nbSamples)
generate and stock database, returns log2likelihood using ProgressNotifier as notification
std::vector< Idx > _varOrder_
variable order in generated database
std::vector< std::string > varOrderNames() const
returns variable order.
std::vector< Idx > _varOrderFromCSV_(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
void toCSV(const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
generates csv representing the generated database
void setRandomVarOrder()
set columns in random order
The class representing a tabular database as used by learning tasks.
The class representing a tabular database stored in RAM.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
Size Idx
Type for indexes.
Definition types.h:79
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
priority queues (in which an element cannot appear more than once)