aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
gum::learning::BNDatabaseGenerator< GUM_SCALAR > Class Template Reference

#include <BNDatabaseGenerator.h>

Inheritance diagram for gum::learning::BNDatabaseGenerator< GUM_SCALAR >:
Collaboration diagram for gum::learning::BNDatabaseGenerator< GUM_SCALAR >:

Public Types

enum class  DiscretizedLabelMode : char { INTERVAL , MEDIAN , RANDOM }

Public Member Functions

Constructors / Destructors
 BNDatabaseGenerator (const BayesNet< GUM_SCALAR > &bn)
 default constructor
 ~BNDatabaseGenerator ()
 destructor
Accessors / Modifiers
double drawSamples (Size nbSamples)
 generate and stock database, returns log2likelihood using ProgressNotifier as notification
double drawSamples (Size nbSamples, const gum::Instantiation &evs, int timeout=300)
 Generate and stock the part of the database compatible with the evidence, returns log2likelihood using ProgressNotifier as notification.
void setDiscretizedLabelModeRandom ()
 set the behaviour of sampling for discretized variable to uniformly draw double value
void setDiscretizedLabelModeMedian ()
 set the behaviour of sampling for discretized variable to deterministic select double median of intervalls
void setDiscretizedLabelModeInterval ()
 set the behaviour of sampling for discretized variable to select the label : "[min,max["
void toCSV (const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
 generates csv representing the generated database
DatabaseTable toDatabaseTable (bool useLabels=true) const
 generates a DatabaseVectInRAM
std::vector< std::vector< Idx > > database () const
 generates database according to bn into a std::vector
Size samplesNbRows () const
 generate and stock database, returns log2likelihood using ProgressNotifier as notification
Size samplesNbCols () const
 generate and stock database, returns log2likelihood using ProgressNotifier as notification
Idx samplesAt (Idx row, Idx col) const
 generate and stock database, returns log2likelihood using ProgressNotifier as notification
std::string samplesLabelAt (Idx row, Idx col) const
 generate and stock database, returns log2likelihood using ProgressNotifier as notification
void setVarOrder (const std::vector< Idx > &varOrder)
 change columns order
void setVarOrder (const std::vector< std::string > &varOrder)
 change columns order using variable names
void setVarOrderFromCSV (const std::string &csvFileURL, const std::string &csvSeparator=",")
 change columns order according to a csv file
void setTopologicalVarOrder ()
 set columns in topoligical order
void setAntiTopologicalVarOrder ()
 set columns in antiTopoligical order
void setRandomVarOrder ()
 set columns in random order
std::vector< IdxvarOrder () const
 returns variable order indexes
std::vector< std::string > varOrderNames () const
 returns variable order.
double log2likelihood () const
 returns log2Likelihood of generated samples
const BayesNet< GUM_SCALAR > & bn (void)
 return const ref to the Bayes Net

Public Attributes

Signaler2< Size, doubleonProgress
 Progression (percent) and time.
Signaler1< const std::string & > onStop
 with a possible explanation for stopping

Private Member Functions

std::string _label_ (const std::vector< Idx > &row, const DiscreteVariable &v, Idx i) const
 return the final string for a label (taking into account the behavior for DiscretizedVariable) from a row
std::vector< Idx_varOrderFromCSV_ (const std::string &csvFileURL, const std::string &csvSeparator=",") const
 returns varOrder from a csv file
std::vector< Idx_varOrderFromCSV_ (std::ifstream &csvFile, const std::string &csvSeparator=",") const
 returns varOrder from a csv file
 BNDatabaseGenerator (const BNDatabaseGenerator &)=delete
 BNDatabaseGenerator (BNDatabaseGenerator &&)=delete
BNDatabaseGeneratoroperator= (const BNDatabaseGenerator &)=delete
BNDatabaseGeneratoroperator= (BNDatabaseGenerator &&)=delete

Private Attributes

DiscretizedLabelMode _discretizedLabelMode_
const BayesNet< GUM_SCALAR > & _bn_
 Bayesian network.
Bijection< std::string, NodeId_names2ids_
 bijection nodes names
Size _nbVars_
 number of variables
std::vector< std::vector< Idx > > _database_
 generated database
std::vector< Idx_varOrder_
 variable order in generated database
bool _drawnSamples_ = false
 whether drawSamples has been already called.
double _log2likelihood_ = 0
 log2Likelihood of generated samples

Detailed Description

template<typename GUM_SCALAR>
class gum::learning::BNDatabaseGenerator< GUM_SCALAR >

Definition at line 100 of file BNDatabaseGenerator.h.

Member Enumeration Documentation

◆ DiscretizedLabelMode

template<typename GUM_SCALAR>
enum class gum::learning::BNDatabaseGenerator::DiscretizedLabelMode : char
strong
Enumerator
INTERVAL 
MEDIAN 
RANDOM 

Definition at line 102 of file BNDatabaseGenerator.h.

102: char { INTERVAL, MEDIAN, RANDOM };

Constructor & Destructor Documentation

◆ BNDatabaseGenerator() [1/3]

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( const BayesNet< GUM_SCALAR > & bn)
explicit

default constructor

Definition at line 57 of file BNDatabaseGenerator_tpl.h.

57 :
58 _bn_(bn) {
60
61 // get the node names => they will serve as ids
62 NodeId id = 0;
63 for (const auto& var: _bn_.dag()) {
64 auto name = _bn_.variable(var).name();
65 _names2ids_.insert(name, var);
66 ++id;
67 }
68 _nbVars_ = id;
69 _varOrder_.resize(_nbVars_);
71 std::iota(_varOrder_.begin(), _varOrder_.end(), (Idx)0);
72 }
const BayesNet< GUM_SCALAR > & bn(void)
return const ref to the Bayes Net
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
Bijection< std::string, NodeId > _names2ids_
bijection nodes names
const BayesNet< GUM_SCALAR > & _bn_
Bayesian network.
std::vector< Idx > _varOrder_
variable order in generated database

References BNDatabaseGenerator(), _bn_, _discretizedLabelMode_, _names2ids_, _nbVars_, _varOrder_, bn(), and RANDOM.

Referenced by BNDatabaseGenerator(), BNDatabaseGenerator(), BNDatabaseGenerator(), ~BNDatabaseGenerator(), operator=(), and operator=().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ~BNDatabaseGenerator()

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::~BNDatabaseGenerator ( )

destructor

Definition at line 76 of file BNDatabaseGenerator_tpl.h.

References BNDatabaseGenerator().

Here is the call graph for this function:

◆ BNDatabaseGenerator() [2/3]

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( const BNDatabaseGenerator< GUM_SCALAR > & )
privatedelete

References BNDatabaseGenerator().

Here is the call graph for this function:

◆ BNDatabaseGenerator() [3/3]

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( BNDatabaseGenerator< GUM_SCALAR > && )
privatedelete

References BNDatabaseGenerator().

Here is the call graph for this function:

Member Function Documentation

◆ _label_()

template<typename GUM_SCALAR>
std::string gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_label_ ( const std::vector< Idx > & row,
const DiscreteVariable & v,
Idx i ) const
private

return the final string for a label (taking into account the behavior for DiscretizedVariable) from a row

Definition at line 293 of file BNDatabaseGenerator_tpl.h.

295 {
296 if (v.varType() == VarType::DISCRETIZED) {
297 switch (_discretizedLabelMode_) {
298 case DiscretizedLabelMode::MEDIAN : return std::to_string(v.numerical(row.at(i)));
300 return std::to_string(static_cast< const IDiscretizedVariable& >(v).draw(row.at(i)));
301 case DiscretizedLabelMode::INTERVAL : return v.label(row.at(i));
302 }
303 }
304
305 return v.label(row.at(i));
306 }

References _discretizedLabelMode_, gum::DISCRETIZED, INTERVAL, gum::DiscreteVariable::label(), MEDIAN, gum::DiscreteVariable::numerical(), RANDOM, and gum::DiscreteVariable::varType().

Referenced by samplesLabelAt(), and toDatabaseTable().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ _varOrderFromCSV_() [1/2]

template<typename GUM_SCALAR>
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_varOrderFromCSV_ ( const std::string & csvFileURL,
const std::string & csvSeparator = "," ) const
private

returns varOrder from a csv file

Definition at line 471 of file BNDatabaseGenerator_tpl.h.

472 {
475 if (csvFile) {
477 csvFile.close();
478 } else {
479 GUM_ERROR(NotFound, "csvFileURL does not exist")
480 }
481
482 return varOrder;
483 }
std::vector< Idx > varOrder() const
returns variable order indexes
std::vector< Idx > _varOrderFromCSV_(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
#define GUM_ERROR(type, msg)
Definition exceptions.h:72

References _varOrderFromCSV_(), GUM_ERROR, and varOrder().

Referenced by _varOrderFromCSV_(), setVarOrderFromCSV(), and toCSV().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ _varOrderFromCSV_() [2/2]

template<typename GUM_SCALAR>
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_varOrderFromCSV_ ( std::ifstream & csvFile,
const std::string & csvSeparator = "," ) const
private

returns varOrder from a csv file

Definition at line 488 of file BNDatabaseGenerator_tpl.h.

489 {
492 header_found.reserve(_nbVars_);
493 while (std::getline(csvFile, line)) {
494 std::size_t i = 0;
495 auto pos = line.find(csvSeparator);
496 while (pos != std::string::npos) {
497 header_found.push_back(line.substr(i, pos - i));
498 pos += csvSeparator.length();
499 i = pos;
500 pos = line.find(csvSeparator, pos);
501
502 if (pos == std::string::npos) header_found.push_back(line.substr(i, line.length()));
503 }
504 break;
505 }
506
508 varOrder.reserve(_nbVars_);
509
510 for (const auto& hf: header_found) {
511 varOrder.push_back(_names2ids_.second(hf));
512 }
513
514 return varOrder;
515 }

References _names2ids_, _nbVars_, and varOrder().

Here is the call graph for this function:

◆ bn()

template<typename GUM_SCALAR>
const BayesNet< GUM_SCALAR > & gum::learning::BNDatabaseGenerator< GUM_SCALAR >::bn ( void )
inline

return const ref to the Bayes Net

Definition at line 209 of file BNDatabaseGenerator.h.

209{ return _bn_; };

References _bn_.

Referenced by BNDatabaseGenerator().

Here is the caller graph for this function:

◆ database()

template<typename GUM_SCALAR>
std::vector< std::vector< Idx > > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::database ( ) const

generates database according to bn into a std::vector

returns database using specified data order

Warning
: makes a copy of the whole database

Definition at line 358 of file BNDatabaseGenerator_tpl.h.

358 {
359 if (!_drawnSamples_) GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.")
360
362 for (Idx i = 0; i < _database_.size(); ++i) {
363 for (Idx j = 0; j < _nbVars_; ++j) {
364 db.at(i).at(j) = (Idx)_database_.at(i).at(_varOrder_.at(j));
365 }
366 }
367 return db;
368 }
bool _drawnSamples_
whether drawSamples has been already called.
std::vector< std::vector< Idx > > _database_
generated database

References _database_, _drawnSamples_, _nbVars_, _varOrder_, and GUM_ERROR.

◆ drawSamples() [1/2]

template<typename GUM_SCALAR>
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples ( Size nbSamples)

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from bn

Definition at line 82 of file BNDatabaseGenerator_tpl.h.

82 {
83 const Instantiation inst;
84 return drawSamples(nbSamples, inst);
85 }
double drawSamples(Size nbSamples)
generate and stock database, returns log2likelihood using ProgressNotifier as notification

References drawSamples().

Referenced by drawSamples().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ drawSamples() [2/2]

template<typename GUM_SCALAR>
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples ( Size nbSamples,
const gum::Instantiation & evs,
int timeout = 300 )

Generate and stock the part of the database compatible with the evidence, returns log2likelihood using ProgressNotifier as notification.

draw instances from bn

Warning
nbSamples is not the number of generated samples but the size of the filtered database. It may happen that the evidence is very rare (or even impossible). In that case, the rejection sampling process may be very slow (or even infinite). In that case, the timeout is mandatory.

@parameter nbSamples: the size of the filtered database. @parameter evs: the evidence. @parameter timeout: the maximum time in seconds to wait for the generation of the samples. If the timeout is reached, the function returns the log2likelihood of the generated samples. if timeout=0, no timeout are watched and the function may run indefinitely.

Definition at line 89 of file BNDatabaseGenerator_tpl.h.

91 {
92 int progress = 0;
93
94 if (onProgress.hasListener()) { GUM_EMIT2(onProgress, progress, 0.0); }
95
96 _database_.clear();
97 _database_.resize(nbSamples);
98 for (auto& row: _database_) {
99 row.resize(_nbVars_);
100 }
101 // get the order in which the nodes will be sampled
102 const auto topOrder = _bn_.topologicalOrder();
104
105 // create instantiations in advance
106 for (NodeId node = 0; node < _nbVars_; ++node)
107 particule.add(_bn_.variable(node));
108
110 timer.reset();
111
112 // perform the sampling
114 Idx idSample = 0;
115 while (idSample < nbSamples) {
116 if (onProgress.hasListener()) {
117 auto p = int((idSample * 100) / nbSamples);
118 if (p != progress) {
119 progress = p;
121 }
122 }
124 bool reject = false;
125 for (Idx rank = 0; rank < _nbVars_; ++rank) {
126 const NodeId node = topOrder[rank];
127 const auto& var = _bn_.variable(node);
128 const auto& cpt = _bn_.cpt(node);
129
130 const double nb = gum::randomProba();
131 double cumul = 0.0;
132 for (particule.setFirstVar(var); !particule.end(); particule.incVar(var)) {
133 cumul += cpt[particule];
134 if (cumul >= nb) break;
135 }
136 if (particule.end()) particule.setLastVar(var);
137
138 if ((!evs.empty()) && evs.contains(var) && (evs.val(var) != particule.val(var))) {
139 reject = true;
140 break;
141 }
142
143 sample.at(node) = particule.val(var);
145 }
146 if (timeout > 0 && timer.step() > timeout) { break; }
147 if (reject) { continue; }
148 idSample++;
149 }
150
151 if (idSample > 0) {
152 if (idSample < nbSamples) _database_.resize(idSample);
153 } else {
154 _database_.clear();
155 }
156 _drawnSamples_ = true;
157
158 if (onProgress.hasListener()) {
160 ss << "Database of size " << idSample << "(" << nbSamples << ") generated in " << timer.step()
161 << " seconds. Log2likelihood : " << _log2likelihood_;
162 GUM_EMIT1(onStop, ss.str());
163 }
164
165 return _log2likelihood_;
166 }
Signaler2< Size, double > onProgress
Progression (percent) and time.
Signaler1< const std::string & > onStop
with a possible explanation for stopping
double _log2likelihood_
log2Likelihood of generated samples
double randomProba()
Returns a random double between 0 and 1 included (i.e.
#define GUM_EMIT1(signal, arg1)
Definition signaler1.h:61
#define GUM_EMIT2(signal, arg1, arg2)
Definition signaler2.h:61

References _bn_, _database_, _drawnSamples_, _log2likelihood_, _nbVars_, gum::Instantiation::add(), gum::Instantiation::contains(), gum::Instantiation::empty(), gum::Instantiation::end(), GUM_EMIT1, GUM_EMIT2, gum::Instantiation::incVar(), gum::ProgressNotifier::onProgress, gum::ProgressNotifier::onStop, gum::randomProba(), gum::Timer::reset(), gum::Instantiation::setFirstVar(), gum::Instantiation::setLastVar(), gum::Timer::step(), and gum::Instantiation::val().

Here is the call graph for this function:

◆ log2likelihood()

template<typename GUM_SCALAR>
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::log2likelihood ( ) const

returns log2Likelihood of generated samples

Definition at line 463 of file BNDatabaseGenerator_tpl.h.

463 {
464 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
465 return _log2likelihood_;
466 }

References _drawnSamples_, _log2likelihood_, and GUM_ERROR.

◆ operator=() [1/2]

template<typename GUM_SCALAR>
BNDatabaseGenerator & gum::learning::BNDatabaseGenerator< GUM_SCALAR >::operator= ( BNDatabaseGenerator< GUM_SCALAR > && )
privatedelete

References BNDatabaseGenerator().

Here is the call graph for this function:

◆ operator=() [2/2]

template<typename GUM_SCALAR>
BNDatabaseGenerator & gum::learning::BNDatabaseGenerator< GUM_SCALAR >::operator= ( const BNDatabaseGenerator< GUM_SCALAR > & )
privatedelete

References BNDatabaseGenerator().

Here is the call graph for this function:

◆ samplesAt()

template<typename GUM_SCALAR>
INLINE Idx gum::learning::BNDatabaseGenerator< GUM_SCALAR >::samplesAt ( Idx row,
Idx col ) const

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from bn

Definition at line 183 of file BNDatabaseGenerator_tpl.h.

183 {
184 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
185 return _database_.at(row).at(_varOrder_.at(col));
186 }

References _database_, _drawnSamples_, _varOrder_, and GUM_ERROR.

◆ samplesLabelAt()

template<typename GUM_SCALAR>
INLINE std::string gum::learning::BNDatabaseGenerator< GUM_SCALAR >::samplesLabelAt ( Idx row,
Idx col ) const

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from bn

Definition at line 189 of file BNDatabaseGenerator_tpl.h.

189 {
190 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
191 const auto j = _varOrder_.at(col);
192 return _label_(_database_.at(row), _bn_.variable(j), j);
193 }
std::string _label_(const std::vector< Idx > &row, const DiscreteVariable &v, Idx i) const
return the final string for a label (taking into account the behavior for DiscretizedVariable) from a...

References _bn_, _database_, _drawnSamples_, _label_(), _varOrder_, and GUM_ERROR.

Here is the call graph for this function:

◆ samplesNbCols()

template<typename GUM_SCALAR>
INLINE Size gum::learning::BNDatabaseGenerator< GUM_SCALAR >::samplesNbCols ( ) const

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from bn

Definition at line 176 of file BNDatabaseGenerator_tpl.h.

176 {
177 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
178
179 return _nbVars_;
180 }

References _drawnSamples_, _nbVars_, and GUM_ERROR.

◆ samplesNbRows()

template<typename GUM_SCALAR>
INLINE Size gum::learning::BNDatabaseGenerator< GUM_SCALAR >::samplesNbRows ( ) const

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from bn

Definition at line 169 of file BNDatabaseGenerator_tpl.h.

169 {
170 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
171
172 return _database_.size();
173 }

References _database_, _drawnSamples_, and GUM_ERROR.

◆ setAntiTopologicalVarOrder()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setAntiTopologicalVarOrder ( )

set columns in antiTopoligical order

set columns in antiTopological order

Definition at line 421 of file BNDatabaseGenerator_tpl.h.

421 {
423 varOrder.reserve(_nbVars_);
424 for (const auto& v: _bn_.topologicalOrder()) {
425 varOrder.push_back(v);
426 }
427 std::reverse(varOrder.begin(), varOrder.end());
429 }
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order

References _bn_, _nbVars_, setVarOrder(), and varOrder().

Here is the call graph for this function:

◆ setDiscretizedLabelModeInterval()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setDiscretizedLabelModeInterval ( )

set the behaviour of sampling for discretized variable to select the label : "[min,max["

Definition at line 206 of file BNDatabaseGenerator_tpl.h.

References _discretizedLabelMode_, and INTERVAL.

◆ setDiscretizedLabelModeMedian()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setDiscretizedLabelModeMedian ( )

set the behaviour of sampling for discretized variable to deterministic select double median of intervalls

Definition at line 201 of file BNDatabaseGenerator_tpl.h.

References _discretizedLabelMode_, and MEDIAN.

◆ setDiscretizedLabelModeRandom()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setDiscretizedLabelModeRandom ( )

set the behaviour of sampling for discretized variable to uniformly draw double value

Warning
: each call to toCSV or toDatabase that use labels will then generate different values
: this is the default behaviour

Definition at line 196 of file BNDatabaseGenerator_tpl.h.

References _discretizedLabelMode_, and RANDOM.

◆ setRandomVarOrder()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setRandomVarOrder ( )

set columns in random order

Definition at line 433 of file BNDatabaseGenerator_tpl.h.

433 {
435 varOrder.reserve(_bn_.size());
436 for (const auto& var: _bn_.dag()) {
437 varOrder.push_back(_bn_.variable(var).name());
438 }
441 }

References _bn_, gum::randomGenerator(), setVarOrder(), and varOrder().

Here is the call graph for this function:

◆ setTopologicalVarOrder()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setTopologicalVarOrder ( )

set columns in topoligical order

set columns in Topological order

Definition at line 410 of file BNDatabaseGenerator_tpl.h.

410 {
412 varOrder.reserve(_nbVars_);
413 for (const auto& v: _bn_.topologicalOrder()) {
414 varOrder.push_back(v);
415 }
417 }

References _bn_, _nbVars_, setVarOrder(), and varOrder().

Here is the call graph for this function:

◆ setVarOrder() [1/2]

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder ( const std::vector< Idx > & varOrder)

change columns order

Definition at line 372 of file BNDatabaseGenerator_tpl.h.

372 {
373 if (varOrder.size() != _nbVars_)
374 GUM_ERROR(FatalError, "varOrder's size must be equal to the number of variables")
375
376 std::vector< bool > usedVars(_nbVars_, false);
377 for (const auto& i: varOrder) {
378 if (i >= _nbVars_) GUM_ERROR(FatalError, "varOrder contains invalid variables")
379 if (usedVars.at(i)) GUM_ERROR(FatalError, "varOrder must not have repeated variables")
380 usedVars.at(i) = true;
381 }
382
383 if (std::find(usedVars.begin(), usedVars.end(), false) != usedVars.end()) {
384 GUM_ERROR(FatalError, "varOrder must contain all variables")
385 }
386
388 }

References _nbVars_, _varOrder_, GUM_ERROR, and varOrder().

Referenced by setAntiTopologicalVarOrder(), setRandomVarOrder(), setTopologicalVarOrder(), setVarOrder(), and setVarOrderFromCSV().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setVarOrder() [2/2]

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder ( const std::vector< std::string > & varOrder)

change columns order using variable names

Definition at line 392 of file BNDatabaseGenerator_tpl.h.

392 {
394 varOrderIdx.reserve(varOrder.size());
395 for (const auto& vname: varOrder) {
396 varOrderIdx.push_back(_names2ids_.second(vname));
397 }
399 }

References _names2ids_, setVarOrder(), and varOrder().

Here is the call graph for this function:

◆ setVarOrderFromCSV()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrderFromCSV ( const std::string & csvFileURL,
const std::string & csvSeparator = "," )

change columns order according to a csv file

Definition at line 403 of file BNDatabaseGenerator_tpl.h.

References _varOrderFromCSV_(), and setVarOrder().

Here is the call graph for this function:

◆ toCSV()

template<typename GUM_SCALAR>
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toCSV ( const std::string & csvFileURL,
bool useLabels = true,
bool append = false,
std::string csvSeparator = ",",
bool checkOnAppend = false ) const

generates csv representing the generated database

generates database, and writes csv file

Definition at line 212 of file BNDatabaseGenerator_tpl.h.

216 {
217 if (!_drawnSamples_) { GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.") }
218
219 if (csvSeparator.find('\n') != std::string::npos) {
220 GUM_ERROR(InvalidArgument, "csvSeparator must not contain end-line characters")
221 }
222
223 bool includeHeader = true;
224 if (append) {
226 if (csvFile) {
230 "Inconsistent variable order in csvFile when appending. You "
231 "can use setVarOrderFromCSV(url) function to get the right "
232 "order. You could also set parameter checkOnAppend=false if you "
233 "know what you are doing.")
234 includeHeader = false;
235 }
236 csvFile.close();
237 }
238
239
241
243 bool firstCol = true;
244 if (includeHeader) {
245 for (const auto& i: _varOrder_) {
246 if (firstCol) {
247 firstCol = false;
248 } else {
249 os << csvSeparator;
250 }
251 os << _bn_.variable(i).name();
252 }
253 }
254 os << std::endl;
255
256 bool firstRow = true;
257 for (const auto& row: _database_) {
258 if (firstRow) {
259 firstRow = false;
260 } else {
261 os << std::endl;
262 }
263 firstCol = true;
264 for (const auto& i: _varOrder_) {
265 if (firstCol) {
266 firstCol = false;
267 } else {
268 os << csvSeparator;
269 }
270 if (useLabels) {
271 const auto& v = _bn_.variable(i);
272 if (v.varType() == VarType::DISCRETIZED) {
273 switch (_discretizedLabelMode_) {
274 case DiscretizedLabelMode::MEDIAN : os << v.numerical(row.at(i)); break;
277 break;
278 case DiscretizedLabelMode::INTERVAL : os << v.label(row.at(i)); break;
279 }
280 } else {
281 os << v.label(row.at(i));
282 }
283 } else {
284 os << row[i];
285 }
286 }
287 }
288
289 os.close();
290 }

References _bn_, _database_, _discretizedLabelMode_, _drawnSamples_, _varOrder_, _varOrderFromCSV_(), gum::DISCRETIZED, GUM_ERROR, INTERVAL, MEDIAN, RANDOM, and varOrder().

Here is the call graph for this function:

◆ toDatabaseTable()

template<typename GUM_SCALAR>
DatabaseTable gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toDatabaseTable ( bool useLabels = true) const

generates a DatabaseVectInRAM

Definition at line 310 of file BNDatabaseGenerator_tpl.h.

310 {
311 if (!_drawnSamples_) GUM_ERROR(OperationNotAllowed, "proceed() must be called first.")
312
314 std::vector< std::string > varNames;
317 varNames.push_back(_names2ids_.first(i));
318 }
319
320 // create the translators
321 for (std::size_t i = 0; i < _nbVars_; ++i) {
322 const Variable& var = _bn_.variable(_varOrder_[i]);
323 db.insertTranslator(var, i);
324 }
325
326 if (useLabels) {
328 for (const auto& row: _database_) {
329 for (Idx i = 0; i < _nbVars_; ++i) {
330 const Idx j = _varOrder_.at(i);
331 xrow[i] = _label_(row, _bn_.variable(j), j);
332 }
333 db.insertRow(xrow);
334 }
335 } else {
337 for (std::size_t i = 0; i < _nbVars_; ++i) {
338 translatorType[i] = db.translator(i).getValType();
339 }
342 for (const auto& row: _database_) {
343 for (Idx i = 0; i < _nbVars_; ++i) {
344 const Idx j = _varOrder_.at(i);
346 xrow[i].discr_val = std::size_t(row.at(j));
347 else xrow[i].cont_val = float(row.at(j));
348 }
349 }
350 db.insertRow(xrow, xmiss);
351 }
352
353 return db;
354 }

References _bn_, _database_, _drawnSamples_, _label_(), _names2ids_, _nbVars_, _varOrder_, gum::learning::DISCRETE, gum::learning::DBTranslator::getValType(), GUM_ERROR, gum::learning::DatabaseTable::insertRow(), gum::learning::DatabaseTable::insertTranslator(), and gum::learning::DatabaseTable::translator().

Here is the call graph for this function:

◆ varOrder()

template<typename GUM_SCALAR>
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder ( ) const

returns variable order indexes

Definition at line 445 of file BNDatabaseGenerator_tpl.h.

445 {
446 return _varOrder_;
447 }

References _varOrder_.

Referenced by _varOrderFromCSV_(), _varOrderFromCSV_(), setAntiTopologicalVarOrder(), setRandomVarOrder(), setTopologicalVarOrder(), setVarOrder(), setVarOrder(), and toCSV().

Here is the caller graph for this function:

◆ varOrderNames()

template<typename GUM_SCALAR>
std::vector< std::string > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrderNames ( ) const

returns variable order.

Definition at line 451 of file BNDatabaseGenerator_tpl.h.

451 {
453 varNames.reserve(_nbVars_);
454 for (const auto& i: _varOrder_) {
455 varNames.push_back(_names2ids_.first(i));
456 }
457
458 return varNames;
459 }

References _names2ids_, _nbVars_, and _varOrder_.

Member Data Documentation

◆ _bn_

template<typename GUM_SCALAR>
const BayesNet< GUM_SCALAR >& gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_bn_
private

◆ _database_

template<typename GUM_SCALAR>
std::vector< std::vector< Idx > > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_database_
private

generated database

Definition at line 225 of file BNDatabaseGenerator.h.

Referenced by database(), drawSamples(), samplesAt(), samplesLabelAt(), samplesNbRows(), toCSV(), and toDatabaseTable().

◆ _discretizedLabelMode_

◆ _drawnSamples_

template<typename GUM_SCALAR>
bool gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_drawnSamples_ = false
private

whether drawSamples has been already called.

Definition at line 231 of file BNDatabaseGenerator.h.

Referenced by database(), drawSamples(), log2likelihood(), samplesAt(), samplesLabelAt(), samplesNbCols(), samplesNbRows(), toCSV(), and toDatabaseTable().

◆ _log2likelihood_

template<typename GUM_SCALAR>
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_log2likelihood_ = 0
private

log2Likelihood of generated samples

Definition at line 234 of file BNDatabaseGenerator.h.

Referenced by drawSamples(), and log2likelihood().

◆ _names2ids_

template<typename GUM_SCALAR>
Bijection< std::string, NodeId > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_names2ids_
private

bijection nodes names

Definition at line 219 of file BNDatabaseGenerator.h.

Referenced by BNDatabaseGenerator(), _varOrderFromCSV_(), setVarOrder(), toDatabaseTable(), and varOrderNames().

◆ _nbVars_

◆ _varOrder_

template<typename GUM_SCALAR>
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::_varOrder_
private

variable order in generated database

Definition at line 228 of file BNDatabaseGenerator.h.

Referenced by BNDatabaseGenerator(), database(), samplesAt(), samplesLabelAt(), setVarOrder(), toCSV(), toDatabaseTable(), varOrder(), and varOrderNames().

◆ onProgress

Signaler2< Size, double > gum::ProgressNotifier::onProgress
inherited

Progression (percent) and time.

Definition at line 67 of file progressNotification.h.

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples().

◆ onStop

Signaler1< const std::string& > gum::ProgressNotifier::onStop
inherited

with a possible explanation for stopping

Definition at line 70 of file progressNotification.h.

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples().


The documentation for this class was generated from the following files: